In [1]:
"""
(4) This script executes the SBERT model chatbot from the pickle file:

1. Loads a pre-trained model and embeddings from a pickle file.
2. Preprocesses user queries by tokenizing, stemming, lemmatizing, and lowercasing the text.
3. Computes embeddings for user queries.
4. Finds the most relevant FAQ answer using cosine similarity.
5. Provides a command-line interface for user interaction.
The chatbot responds to user queries based on precomputed FAQ data and embeddings. If no suitable answer is found, it provides a default message.
"""

'\n(4) This script executes the SBERT model chatbot from the pickle file:\n\n1. Loads a pre-trained model and embeddings from a pickle file.\n2. Preprocesses user queries by tokenizing, stemming, lemmatizing, and lowercasing the text.\n3. Computes embeddings for user queries.\n4. Finds the most relevant FAQ answer using cosine similarity.\n5. Provides a command-line interface for user interaction.\nThe chatbot responds to user queries based on precomputed FAQ data and embeddings. If no suitable answer is found, it provides a default message.\n'

In [2]:
import re
import pickle
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
# Load NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Initialize stemmer and lemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()

[nltk_data] Downloading package punkt to /Users/jjpark987/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/jjpark987/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /Users/jjpark987/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


In [4]:
# Load model and embeddings from pickle file
with open('model/keelworks_model.pkl', 'rb') as f:
    model_data = pickle.load(f)

model = model_data['model']
faqs = model_data['faqs']
faq_questions = model_data['faq_questions']
faq_embeddings = model_data['faq_embeddings']

  from tqdm.autonotebook import tqdm, trange


In [5]:
# Preprocess text (tokenization, stemming, lemmatization, and lowercasing)
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text)  # Remove non-alphanumeric characters
    text = text.lower()  # Convert to lowercase
    tokens = word_tokenize(text)  # Tokenize text
    tokens = [stemmer.stem(word) for word in tokens]  # Apply stemming
    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Apply lemmatization
    return ' '.join(tokens)  # Join tokens back into a single string

# Generate SBERT embeddings
def get_sbert_embedding(text):
    embedding = model.encode(text)
    return embedding

# Find the best matching answer
def get_best_answer(user_query, faqs, faq_embeddings, threshold=0.5):
    preprocessed_query = preprocess_text(user_query)
    query_embedding = get_sbert_embedding(preprocessed_query).reshape(1, -1)

    similarities = cosine_similarity(query_embedding, faq_embeddings)
    best_match_index = similarities.argmax()
    best_match_score = similarities[0, best_match_index]

    if best_match_score < threshold:
        return "Sorry, I don't have the answer. Please email to test@keelworks to get more info."
    
    return faqs[best_match_index]['answer']

In [6]:
# Command-Line Interface
def chatbot():
    print("Welcome to the KeelWorks Chatbot!")
    user_name = input("Please enter your name: ")
    print(f"Hello {user_name}, welcome to the KeelWorks bot. Ask me anything about KeelWorks.")

    while True:
        user_query = input("\nYou: ")
        if user_query.lower() in ['exit', 'quit', 'bye']:
            print(f"Goodbye, {user_name}!")
            break
        answer = get_best_answer(user_query, faqs, faq_embeddings)
        print(f"Bot: {answer}")

if __name__ == '__main__':
    chatbot()

Welcome to the KeelWorks Chatbot!
Hello Jaosn, welcome to the KeelWorks bot. Ask me anything about KeelWorks.
Bot: KeelWorks is a non-profit, charitable foundation established 13 years ago, focusing on empowering individuals globally, including in places like China, India, Egypt, Nigeria, Ghana, UK, Brazil, Kenya, Canada, and the USA. We help economically disadvantaged individuals gain new competencies, supporting their families and contributing to their communities.
Bot: Sorry, I don't have the answer. Please email to test@keelworks to get more info.
Goodbye, Jaosn!
