# Building a Simple Chatbot from Scratch in Python using NLTK

<img src="covid chatbot.gif" width="1000" align="left">

## Import packages

In [1]:
import io
import random
import pandas as pd
import string # to process standard python strings
import warnings
import numpy as np

In [2]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.stem import WordNetLemmatizer
nltk.download('popular', quiet=True)

warnings.filterwarnings('ignore')

## Reading from Corona Virus Wikipedia Page

In [3]:
f=open('covid chatbot.txt','r',errors = 'ignore')
raw=f.read()
raw = raw.lower() # converts to lowercase

## Tokenization

In [4]:
sent_tokens = nltk.sent_tokenize(raw) # converts to list of sentences 
word_tokens = nltk.word_tokenize(raw) # converts to list of words

In [5]:
lemmer = nltk.stem.WordNetLemmatizer()
#WordNet is a semantically-oriented dictionary of English included in NLTK.
def LemmaTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punct_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemmaNormalize(text):
    return LemmaTokens(nltk.word_tokenize(text.lower().translate(remove_punct_dict)))

## Greetings

In [6]:
inputs = ("hello", "hi", "sup", "what's up","hey","hi there")
replies = ("hi", "hey", "hi there", "hello", "all good")

def greeting(sentence):
    for word in sentence.split():
        if word.lower() in inputs:
            return random.choice(replies)

Here, the function will find out similar texts when the user types in something, and if it doesn't get anything then, it will send a generic message of apology.

In [7]:
def response(user_response):
    robo_response=''
    sent_tokens.append(user_response)
    TfidfVec = TfidfVectorizer(tokenizer=LemmaNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sent_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx=vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if(req_tfidf==0):
        robo_response=robo_response+"I am sorry! I don't understand you"
        return robo_response
    else:
        robo_response = robo_response+sent_tokens[idx]
        return robo_response

Appending initial and final lines

In [None]:
flag=True
print("Bot: Hi, I'm Maulik Bot. Ask me about corona virus. I'll answer your queries about Chatbots. If you want to exit, type Bye!")
while(flag==True):
    user_response = input()
    user_response=user_response.lower()
    if(user_response!='bye'):
        if(user_response=='thanks' or user_response=='thank you' or user_response=='ty'):
            flag=False
            print("Bot: You are welcome..")
        else:
            if(greeting(user_response)!=None):
                print("Bot: "+greeting(user_response))
            else:
                print("Bot: ",end="")
                print(response(user_response))
                sent_tokens.remove(user_response)
    else:
        flag=False
        print("Bot: Bye! take care..")

Bot: Hi, I'm Maulik Bot. Ask me about corona virus. I'll answer your queries about Chatbots. If you want to exit, type Bye!
hi
Bot: hello
covid 19
Bot: there is no medication known to be effective at preventing covid 19.
what are the signs of covid 19
Bot: there is no medication known to be effective at preventing covid 19.
what is the cause of covid 19
Bot: cause
transmission

covid 19 is a new disease, and many of the details of its spread are still under investigation.
how to prevent covid 19
Bot: there is no medication known to be effective at preventing covid 19.
