In [7]:
import json
import nltk
import numpy as np
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [8]:
with open('data.json') as json_file:
    conversations = json.load(json_file)

df = pd.DataFrame(conversations['conversations'])
df

Unnamed: 0,input,response
0,hi,Hello!
1,hello,"Hello, how are you"
2,Fine thanks,Happy to hear this
3,how are you,"I'm doing well, thank you!"
4,bye,Goodbye!
5,what's your name,You can call me ChatBot.
6,What is the weather like today?,"I'm sorry, I don't have access to real-time da..."
7,Tell me a joke.,Sure! Why don't scientists trust atoms? Becaus...
8,What's the capital of France?,The capital of France is Paris.
9,How old are you?,"I am an AI language model, so I don't have an ..."


In [9]:
#function to preprocess the text
def preprocess_text(text):
    stop_words = set(nltk.corpus.stopwords.words('english'))

    words = nltk.word_tokenize(text.lower())

    words = [word for word in words if word.isalnum() and word not in stop_words]

    return ' '.join(words)

In [10]:

preprocessed_inputs = [preprocess_text(conversation['input']) for conversation in conversations['conversations']]

print(preprocessed_inputs)

responses = [conversation['response'] for conversation in conversations['conversations']]


vectorizer = TfidfVectorizer()
tfidf_matrix = vectorizer.fit_transform(preprocessed_inputs)

['hi', 'hello', 'fine thanks', '', 'bye', 'name', 'weather like today', 'tell joke', 'capital france', 'old', 'current president united states', 'meaning life', 'recommend good movie', 'mood comedy', 'tallest mountain world', 'say spanish', 'pets', 'best way learn programming', 'population china', 'favorite book', 'help math homework', 'largest ocean world', 'take care houseplant', 'distance earth moon', 'tell fun fact', 'long take learn new language', 'primary colors']


In [11]:
def classify_text(text):
    preprocessed_input = preprocess_text(text)
    input_vector = vectorizer.transform([preprocessed_input])
    similarity_scores = cosine_similarity(input_vector, tfidf_matrix)
    max_index = np.argmax(similarity_scores)
    if max(similarity_scores.tolist()[0])==0.0:
        return ""
    return responses[max_index]

In [12]:
Bye_list=['exit', 'see you later','bye','quit','stop','see you later', 'have a nice day']

print("Bot: Hello, I'm Chatbot. How can I help you today?")

while True:
    user_message = input("you: ")
    print("you : "+user_message)

    if user_message.lower() in Bye_list:
        print('Bot: see you later !')
        break

    res=classify_text(user_message) 

    if res=="":
        print("Bot: I'm sorry, I didn't catch that Please try again.")
        continue

    print(f"Bot: {res}") 

Bot: Hello, I'm Chatbot. How can I help you today?
you : hello
Bot: Hello, how are you
you : fine thanks 
Bot: Happy to hear this 
you : how are you
Bot: I'm sorry, I didn't catch that Please try again.
you : what is your name 
Bot: You can call me ChatBot.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : 
Bot: I'm sorry, I didn't catch that Please try again.
you : bye
Bot: see you later !
