## Chatbot: NLTK

In [1]:
import numpy as np
import nltk
import string
import random

In [2]:
f = open("corpus_restaurant.txt", "r")
corpus = f.read()

In [9]:
corpus[:200]

"Welcome to Gourmet Haven, the best dining destination in the heart of the city. Our restaurant offers an exquisite menu featuring a wide range of dishes that cater to all taste buds. Whether you're a "

### Text Preprocessing Step

#### Lowercase

In [11]:
raw_doc = corpus.lower()
nltk.download('punkt')
nltk.download('wordnet')
nltk.download("omw-1.4")

[nltk_data] Downloading package punkt to C:\Users\acer/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\acer/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\acer/nltk_data...


True

#### Tokenizing

In [12]:
sentence_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

In [13]:
sentence_tokens[:10]

['welcome to gourmet haven, the best dining destination in the heart of the city.',
 'our restaurant offers an exquisite menu featuring a wide range of dishes that cater to all taste buds.',
 "whether you're a fan of italian cuisine, love asian flavors, or prefer classic american dishes, we have something for everyone!",
 'our chefs use only the freshest ingredients to create mouthwatering meals.',
 'from appetizers to desserts, each dish is crafted with care and precision.',
 'our vegan options are not only healthy but also delicious!',
 'we have gluten-free dishes to accommodate our guests with dietary restrictions.',
 'be sure to try our specials today; they are always a hit.',
 'looking to book a table?',
 'you can make a reservation for two or more with ease.']

In [14]:
word_tokens[:10]

['welcome',
 'to',
 'gourmet',
 'haven',
 ',',
 'the',
 'best',
 'dining',
 'destination',
 'in']

#### Lemmatizing and Normalizing text:

In [16]:
lemmer = nltk.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

### Greet User

In [17]:
greet_inputs = ("hello", "hi", "greetings", "sup", "what's up", "hey",)
greet_responses = ["hi", "hey", "*nods*", "hi there", "hello", "I am glad! You are talking to me"]

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

### Response from bot

In [18]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [19]:
def response(user_response):
    """Expected to be the user input"""
    # hold the response generated by the bot
    robo1_response = ""
    
    # tfidf object created which will convert a collection of raw documents to a matrix of TF-IDF features
    TfidfVector = TfidfVectorizer(tokenizer=LemNormalize, stop_words="english")
    
    # tfidf called upon sentence tokens which is a list of sentences (including the user response which is at last appended)
    tfidf = TfidfVector.fit_transform(sentence_tokens)
    
    # The cosine similarity between the last TF-IDF vector (representing the user response) and the rest of the vectors stored in vals
    vals = cosine_similarity(tfidf[-1], tfidf)
    
    # The argsort method returns the indices that world sort the array. The index of the second highest similarity value (excluding the last entry, which would be the user's response itself and is 1).
    idx = vals.argsort()[0][-2]
    
    # The similarity vales array is flattened to a 1D array
    flat = vals.flatten()
    
    # The array is sorted in ascending order (smallest cosinge value to highest cosine value)
    flat.sort()
    
    # The second highest similarity value (the highest being the users response itself) is stored in req_tfidf
    req_tfidf = flat[-2]
    
    # to check if the similarity score is 0: if 0 no sentence is similar to user query.
    if(req_tfidf == 0):
        robo1_response = robo1_response + "I am sorry! I don't understand you"
        return robo1_response
    else:
        robo1_response = robo1_response + sentence_tokens[idx]
        return robo1_response

### ChatFlow:

In [22]:
# Flag true to keep conversation going
flag = True

# Initial bot message
print("Restaurant Bot: I am a Restaurant Bot. I will answer your queries about restaurants. If you want to exit, type exit!")

while(flag):
    # getting the user response
    user_response = input()
    print("You: ", user_response)
    user_response = user_response.lower()
    
    # if user response is other than 'exit',
    if(user_response != "exit"):
        # if user response is thanks or thank you, the bot will stop the conversation
        if(user_response == "thanks" or user_response == "thank you"):
            flag = False
            print("Restaurant Bot: You are welcome!")
        else:
            # check if the response from user is in greet or not, if in greet then greet the user.
            if(greet(user_response) != None):
                print("Restaurant Bot: " + greet(user_response))
            # otherwise perform the response function to answer the query (retrieve the most similar sentence from the corpus above)
            else:
                # add the user input to the sentence tokens at last
                sentence_tokens.append(user_response)
                
                # also add the word tokens in the word tokens list
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                
                # get unique vocab (words) including the latest user input query.
                final_words = list(set(word_tokens))
                
                # Prepare for the response.
                print("Restaurant Bot: ", end="")
                
                # get the most similar sentence from the corpus.
                print(response(user_response))
                
                # remove the user response from the sentence token collection.
                sentence_tokens.remove(user_response)
    else:
        flag = False
        print("Restaurant Bot: Bye! Take care!")

Restaurant Bot: I am a Restaurant Bot. I will answer your queries about restaurants. If you want to exit, type exit!
You:  Hello
Restaurant Bot: *nods*




You:  where is the location
Restaurant Bot: our location is easily accessible, with ample parking available.
You:  what about safety
Restaurant Bot: safety is a top priority at gourmet haven.
You:  who is the chef
Restaurant Bot: our chefs' specials change daily, ensuring there's always something new to try.
You:  what are special items
Restaurant Bot: for special occasions, our private dining area is perfect.
You:  bye
Restaurant Bot: I am sorry! I don't understand you
You:  exit
Restaurant Bot: Bye! Take care!
