In [13]:
import json
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


# Download necessary NLTK data
#nltk.download('punkt')
#nltk.download('stopwords')

 

In [14]:
with open('data.json') as json_file:
    conversations = json.load(json_file)

In [15]:
# Define a function to preprocess the text
def preprocess_text(text):
    # Set of stop words in English language
    stop_words = set(nltk.corpus.stopwords.words('english'))

    # Tokenize the text and convert to lowercase
    words = nltk.word_tokenize(text.lower())

    # Filter out non-alphanumeric words and stop words
    words = [word for word in words if word.isalnum() and word not in stop_words]

    # Join the filtered words back into a string
    return ' '.join(words)

In [16]:
# Preprocess the training data
preprocessed_inputs = [preprocess_text(conversation['input']) for conversation in conversations['conversations']]
#print(preprocessed_inputs)
responses = [conversation['response'] for conversation in conversations['conversations']]

# Create a TF-IDF vectorizer and transform the preprocessed inputs
vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_inputs)

In [17]:
# Define a function to classify user input using cosine similarity
def classify_text(text):
    preprocessed_input = preprocess_text(text)
    input_vector = vectorizer.transform([preprocessed_input])
    similarity_scores = cosine_similarity(input_vector, tfidf_matrix)
    max_index = np.argmax(similarity_scores)
    if max(similarity_scores.tolist()[0])==0.0:
        return ""
    return responses[max_index]

#Test the classifier with some example inputs
print(classify_text("Hi!"))
print(classify_text("what is your name?"))
print(classify_text("What's the capital of France?"))


Hello!
You can call me ChatBot.
The capital of France is Paris.


In [19]:
exit_list=['exit', 'see you later','bye','quit','stop']

print("Bot: Hello, I'm Chatbot. How can I help you today?")

while True:
    user_message = input("you: ")
    print("you : "+user_message)

    if user_message.lower() in exit_list:
        print('Bot: see you later !')
        break

    res=classify_text(user_message) 

    if res=="":
        print("Bot: I'm sorry, I didn't catch that Please try to write something more clear.")
        continue

    print(f"Bot: {res}")

Bot: Hello, I'm Chatbot. How can I help you today?
you : hi
Bot: Hello!
you : what is your name?
Bot: You can call me ChatBot.
you : i want to know your name
Bot: You can call me ChatBot.
you : kk
Bot: I'm sorry, I didn't catch that Please try to write something more clear.
you : how old are you
Bot: I am an AI language model, so I don't have an age.
you : what is the capital of france
Bot: The capital of France is Paris.
you : bye
Bot: see you later !
