In [None]:
"""
(1) This script implments a BERT model chatbot:

1. Loads a pre-trained BERT tokenizer and model.
2. Preprocesses user input and FAQ questions.
3. Computes embeddings for FAQ questions and stores them.
4. Matches user queries to the most similar FAQ answer using cosine similarity.
5. Provides a command-line interface for interactive querying.
"""

In [None]:
import re
import json
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from transformers import BertTokenizer, BertModel

In [None]:
# Load pre-trained BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')

# Preprocess text (simple tokenization and lowercasing)
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text)  # Remove non-alphanumeric characters
    text = text.lower()  # Convert to lowercase
    return text

# Generate BERT embeddings
def get_bert_embedding(text):
    inputs = tokenizer(text, return_tensors='pt', truncation=True, padding=True, max_length=512)
    outputs = model(**inputs)
    embeddings = outputs.last_hidden_state.mean(dim=1)
    return embeddings.detach().numpy()

# Find the best matching answer
def get_best_answer(user_query, faqs, faq_embeddings):
    preprocessed_query = preprocess_text(user_query)
    query_embedding = get_bert_embedding(preprocessed_query).reshape(1, -1)
    
    similarities = cosine_similarity(query_embedding, faq_embeddings)
    best_match_index = similarities.argmax()
    return faqs[best_match_index]['answer']

In [None]:
# Load the FAQs from the JSON file
with open('data/keelworks_info.json', 'r') as file:
    data = json.load(file)

faqs = data['questions_and_answers']

# Precompute embeddings for FAQ questions
faq_embeddings = [get_bert_embedding(preprocess_text(faq['question'])) for faq in faqs]
faq_embeddings = np.vstack(faq_embeddings)  # Ensure embeddings are in a 2D array

In [None]:
# Command-Line Interface
def chatbot():
    print("Welcome to the KeelWorks Chatbot! Ask me anything about KeelWorks.")
    while True:
        user_query = input("\nYou: ")
        if user_query.lower() in ['exit', 'quit', 'bye']:
            print("Goodbye!")
            break
        answer = get_best_answer(user_query, faqs, faq_embeddings)
        print(f"Bot: {answer}")

if __name__ == '__main__':
    chatbot()