In [7]:
!pip install nltk
!pip install sentence-transformers torch


Collecting sentence-transformers
  Downloading sentence_transformers-3.0.1-py3-none-any.whl (227 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m227.1/227.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (23.7 MB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (823 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl (14.1 MB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl (731.7 MB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl (410.6 MB)
Collecting nvidia-cufft-cu12==11.0.2.54 (from torch)
  Using cach

In [2]:
import pickle
import re
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.stem import PorterStemmer
from nltk.stem import WordNetLemmatizer
from nltk.tokenize import word_tokenize


In [3]:

# Load NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Initialize stemmer and lemmatizer
stemmer = PorterStemmer()
lemmatizer = WordNetLemmatizer()


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...


In [4]:

# Preprocess text (tokenization, stemming, lemmatization, and lowercasing)
def preprocess_text(text):
    text = re.sub(r'\W+', ' ', text)  # Remove non-alphanumeric characters
    text = text.lower()  # Convert to lowercase
    tokens = word_tokenize(text)  # Tokenize text
    tokens = [stemmer.stem(word) for word in tokens]  # Apply stemming
    tokens = [lemmatizer.lemmatize(word) for word in tokens]  # Apply lemmatization
    return ' '.join(tokens)  # Join tokens back into a single string


In [8]:

# Load model and embeddings from pickle file
with open('model/keelworks_model.pkl', 'rb') as f:
    model_data = pickle.load(f)

model = model_data['model']
faq_questions = model_data['faq_questions']
faq_embeddings = model_data['faq_embeddings']
faqs = model_data['faqs']


  from tqdm.autonotebook import tqdm, trange


In [9]:
# Generate SBERT embeddings
def get_sbert_embedding(text):
    embedding = model.encode(text)
    return embedding

# Find the best matching answer
def get_best_answer(user_query, faqs, faq_embeddings, threshold=0.5):
    preprocessed_query = preprocess_text(user_query)
    query_embedding = get_sbert_embedding(preprocessed_query).reshape(1, -1)

    similarities = cosine_similarity(query_embedding, faq_embeddings)
    best_match_index = similarities.argmax()
    best_match_score = similarities[0, best_match_index]

    if best_match_score < threshold:
        return "Sorry, I don't have the answer. Please email to test@keelworks to get more info."
    return faqs[best_match_index]['answer']


In [13]:

# Command-Line Interface
def chatbot():
    print("Welcome to the KeelWorks Chatbot!")
    user_name = input("Please enter your name: ")
    print(f"Hello {user_name}, welcome to the KeelWorks bot. Ask me anything about KeelWorks.")

    while True:
        user_query = input("\nYou: ")
        if user_query.lower() in ['exit', 'quit', 'bye']:
            print(f"Goodbye, {user_name}!")
            break
        answer = get_best_answer(user_query, faqs, faq_embeddings)
        print(f"Bot: {answer}")

if __name__ == '__main__':
    chatbot()

Welcome to the KeelWorks Chatbot!
Please enter your name: test
Hello test, welcome to the KeelWorks bot. Ask me anything about KeelWorks.

You: what is this org
Bot: Sorry, I don't have the answer. Please email to test@keelworks to get more info.

You: exit
Goodbye, test!
