In [None]:
import nltk
import numpy as np
import random
import string

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# download NLTK data
nltk.download('punkt')
nltk.download('wordnet')

# load data
with open('data.txt', 'r', encoding='utf8', errors='ignore') as f:
    raw_data = f.read().lower()

# pre-processing data
sent_tokens = nltk.sent_tokenize(raw_data)
word_tokens = nltk.word_tokenize(raw_data)
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

# greeting messages
GREETING_INPUTS = ["hi", "hello", "hey", "what's up", "yo"]
GREETING_RESPONSES = ["hi", "hello", "hey", "what's up", "yo"]

# generate response to user input
def generate_response(user_input):
    bot_response = ''
    sent_tokens.append(user_input)

    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]

    if req_tfidf == 0:
        bot_response = bot_response + "I am sorry! I don't understand you."
        return bot_response
    else:
        bot_response = bot_response + sent_tokens[idx]
        return bot_response

# chat with user
flag = True
print("Bot: My name is Bot. I am an AI-based chatbot. If you want to exit, type 'bye'.")
while flag:
    user_input = input("You: ")
    user_input = user_input.lower()
    if user_input != 'bye':
        if user_input in GREETING_INPUTS:
            bot_response = random.choice(GREETING_RESPONSES)
            print("Bot: " + bot_response)
        else:
            print("Bot: ", end="")
            print(generate_response(user_input))
            sent_tokens.remove(user_input)
    else:
        flag = False
        print("Bot: Bye! Have a nice day.")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Bot: My name is Bot. I am an AI-based chatbot. If you want to exit, type 'bye'.
You: hi
Bot: hi
You: hello
Bot: hey
You: yo
Bot: yo
You: ok
Bot: I am sorry! I don't understand you.
You: what are you doing
Bot: I am sorry! I don't understand you.
You: bhaag gadhe
Bot: I am sorry! I don't understand you.
