### NLTK Chatbot

In [1]:
# Import libraries
import numpy as np
import nltk
import string
import random
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
# Reading the corpus of text
f = open('text.txt', 'r', errors='ignore')
raw_doc = f.read()
f.close()

In [4]:
raw_doc = raw_doc.lower()  # lowercase
nltk.download('punkt')  # punk tokenizer
nltk.download('wordnet')  # wordnet dictionary
nltk.download('omw-1.4')


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     C:\Users\PC\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

### Text Preprocessing

In [5]:
sentence_tokens = nltk.sent_tokenize(raw_doc)
word_tokens = nltk.word_tokenize(raw_doc)

In [6]:
sentence_tokens[:5]

['\nmain menu\n\nwikipediathe free encyclopedia\nsearch wikipedia\nsearch\ncreate account\nlog in\n\npersonal tools\ncontents hide\n(top)\nbackground\ndevelopment\napplication\ntoggle application subsection\nmessaging apps\nas part of company apps and websites\nchatbot sequences\ncompany internal platforms\ncustomer service\nhealthcare\npolitics\ngovernment\ntoys\nmalicious use\ndata security\nlimitations of chatbots\nchatbots and jobs\nsee also\nreferences\nfurther reading\nexternal links\nchatbot\n\narticle\ntalk\nread\nedit\nview history\n\ntools\nappearance hide\ntext\n\nsmall\n\nstandard\n\nlarge\nwidth\n\nstandard\n\nwide\ncolor (beta)\n\nautomatic\n\nlight\n\ndark\nreport an issue with dark mode\nfrom wikipedia, the free encyclopedia\nfor the bot-creation software, see chatbot.',
 'for bots on internet relay chat, see irc bot.',
 'parts of this article (those related to everything, particularly sections after the intro) need to be updated.',
 'the reason given is: this article i

In [7]:
word_tokens[:10]

['main',
 'menu',
 'wikipediathe',
 'free',
 'encyclopedia',
 'search',
 'wikipedia',
 'search',
 'create',
 'account']

In [8]:
lemmer = nltk.stem.WordNetLemmatizer()
def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]

remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

### Greeting functions

In [9]:
greet_inputs = ("hello", "hi", "whatsapp", "how are you?")
greet_responses = ("hi", "hey", "hey there!", "there there!!")

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)
    return None

### Machine Learning

In [10]:
def response(user_response):
    robo1_response = ''
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    all_sentences = sentence_tokens + [user_response]
    tfidf = TfidfVec.fit_transform(all_sentences)
    vals = cosine_similarity(tfidf[-1], tfidf[:-1])
    
    # Find the most similar sentence
    idx = vals.argsort()[0][-1]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-1]
    
    # Check if the similarity score is above a threshold
    if req_tfidf == 0:
        robo1_response = "I am sorry. Unable to understand you!"
    else:
        robo1_response = sentence_tokens[idx]
    
    return robo1_response


### Defining the chatflow

In [11]:
flag = True
print('Hello! I am a Learning Bot. Start typing your text. To end the conversation, type "bye"!')

while flag:
    user_response = input()
    user_response = user_response.lower()
    
    if user_response != 'bye':
        if user_response in ['thank you', 'thanks']:
            flag = False
            print('Bot: You are welcome...')
        elif greet(user_response) is not None:
            print('Bot:', greet(user_response))
        else:
            sentence_tokens.append(user_response)
            word_tokens += nltk.word_tokenize(user_response)
            print('Bot:', response(user_response))
            sentence_tokens.remove(user_response)
    else:
        flag = False
        print('Bot: Goodbye!')






        

Hello! I am a Learning Bot. Start typing your text. To end the conversation, type "bye"!
Bot: there there!!




Bot: what is a chatbot
Bot: chatbot
Bot: ai
Bot: what is ai
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: goodbye
Bot: dscsc
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
Bot: I am sorry. Unable to understand you!
