<a href="https://colab.research.google.com/github/pranjalraj28/nlp_chatbot/blob/main/chat.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import numpy as np
import nltk
import string
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Open the text file for reading
with open('/content/fastfood.txt', 'r', errors='ignore') as f:
    raw_doc = f.read()

# Convert the text to lowercase
raw_doc = raw_doc.lower()

# Download NLTK resources
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('omw-1.4')

# Tokenize the text into sentences and words
sentence_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

# Initialize the lemmatizer
lemmer = nltk.stem.WordNetLemmatizer()

# Define a function to lemmatize tokens
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

# Create a dictionary to remove punctuation
remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)

# Define a function to lemmatize and normalize text
def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

# Greeting inputs and responses
greet_inputs = ('hello', 'hi', 'whassup', 'how are you?')
greet_responses = ('hi', 'Hey', 'Hey There!', 'There there!')

# Define a greeting function
def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

# Initialize a flag
flag = True

print('Hello, I am the Retrieval Learning Bot. Start typing your text after greeting to talk to me. For ending convo, type "bye!"')

while flag:
    user_response = input()
    user_response = user_response.lower()

    if user_response != 'bye':
        if user_response == 'thank you' or user_response == 'thanks':
            flag = False
            print('Bot: You are Welcome')
        else:
            greeting = greet(user_response)
            if greeting is not None:
                print('Bot: ' + greeting)
            else:
                sentence_tokens.append(user_response)
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                final_words = list(set(word_tokens))

                # Get the response using TF-IDF and cosine similarity
                TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
                tfidf = TfidfVec.fit_transform(sentence_tokens)
                vals = cosine_similarity(tfidf[-1], tfidf)
                idx = vals.argsort()[0][-2]
                flat = vals.flatten()
                flat.sort()
                req_tfidf = flat[-2]
                if req_tfidf == 0:
                    print("Bot: I am sorry. Unable to understand you!")
                else:
                    robo1_response = sentence_tokens[idx]
                    print('Bot: ' + robo1_response)
                sentence_tokens.remove(user_response)
    else:
        flag = False
        print('Bot: Goodbye!')


[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to /root/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Hello, I am the Retrieval Learning Bot. Start typing your text after greeting to talk to me. For ending convo, type "bye!"
hello
Bot: Hey There!
tell me about burger king
Bot: "burger king".
what is fast food
Bot: fast food nation.
tell me about frying food
Bot: frying foods in vats of searing oil proved as dangerous as it was expensive.
bye
Bot: Goodbye!
