In [1]:
import numpy as np
import pandas as pd
import nltk
import string
import random

In [2]:
raw_data = pd.read_fwf("Data.txt", header=None)
raw_data

Unnamed: 0,0
0,Conversation 1:
1,User: Help! There's a fire in the building!
2,Orby: Stay calm. Please evacuate the building ...
3,Conversation 2:
4,User: There's been a chemical spill in the lab!
...,...
145,User: There's a suspicious package in the mail...
146,Orby: Alert! Clear the area and notify buildin...
147,Conversation 50:
148,User: There's a mold infestation in the office...


In [3]:
type(raw_data)

pandas.core.frame.DataFrame

In [4]:
raw_data = raw_data[0].str.lower() # converting the entire text into lower case format
raw_data

0                                        conversation 1:
1            user: help! there's a fire in the building!
2      orby: stay calm. please evacuate the building ...
3                                        conversation 2:
4        user: there's been a chemical spill in the lab!
                             ...                        
145    user: there's a suspicious package in the mail...
146    orby: alert! clear the area and notify buildin...
147                                     conversation 50:
148    user: there's a mold infestation in the office...
149    orby: caution! close off the affected area and...
Name: 0, Length: 150, dtype: object

In [5]:
type(raw_data)

pandas.core.series.Series

In [6]:
nltk.download('punkt') # using the Punkt Tokenizer
nltk.download('wordnet') # using the Wordnet dictionary
nltk.download('omw-1.4') # using the Open Multilingual Wordnet package

[nltk_data] Downloading package punkt to C:\Users\adity/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\adity/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\adity/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [7]:
raw_data = raw_data.to_string() # converting series to string object
raw_data

"0                                        conversation 1:\n1            user: help! there's a fire in the building!\n2      orby: stay calm. please evacuate the building ...\n3                                        conversation 2:\n4        user: there's been a chemical spill in the lab!\n5      orby: alert! stay away from the affected area....\n6                                        conversation 3:\n7                 user: there's a flood in the basement!\n8      orby: urgent! turn off all electrical applianc...\n9                                        conversation 4:\n10                     user: i'm trapped in an elevator!\n11     orby: remain calm. press the emergency button ...\n12                                       conversation 5:\n13              user: there's a gas leak in the kitchen!\n14     orby: danger! open all windows and doors to ve...\n15                                       conversation 6:\n16       user: help! there's a severe storm approaching!\n17     orby: 

In [8]:
sentence_tokens = nltk.sent_tokenize(raw_data)
word_tokens = nltk.word_tokenize(raw_data)

In [9]:
sentence_tokens[:5]

['0                                        conversation 1:\n1            user: help!',
 "there's a fire in the building!",
 '2      orby: stay calm.',
 "please evacuate the building ...\n3                                        conversation 2:\n4        user: there's been a chemical spill in the lab!",
 '5      orby: alert!']

In [10]:
word_tokens[:5]

['0', 'conversation', '1', ':', '1']

### Performing Text-Preprocessing Steps:

In [11]:
lemmer = nltk.stem.WordNetLemmatizer()

def LemTokens(tokens):
    return [lemmer.lemmatize(token) for token in tokens]
remove_punc_dict = dict((ord(punct), None) for punct in string.punctuation)

def LemNormalize(text):
    return LemTokens(nltk.word_tokenize(text.lower().translate(remove_punc_dict)))

### Define Greeting Functions:

In [12]:
greet_inputs = ('hello', 'hey', 'hi', 'wassup', 'you there?', 'how are you?')
greet_responses = ('Hi', 'Hello', 'Hey', 'Hey there!', 'There there!!')

def greet(sentence):
    for word in sentence.split():
        if word.lower() in greet_inputs:
            return random.choice(greet_responses)

### Response Generation by the Bot:

In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [14]:
def response(user_response):
    orby_response = ''
    TfidfVec = TfidfVectorizer(tokenizer=LemNormalize, stop_words='english')
    tfidf = TfidfVec.fit_transform(sentence_tokens)
    vals = cosine_similarity(tfidf[-1], tfidf)
    idx = vals.argsort()[0][-2]
    flat = vals.flatten()
    flat.sort()
    req_tfidf = flat[-2]
    if (req_tfidf == 0):
        orby_response = orby_response + "I am sorry. I am unable to understand you!"
        return orby_response
    else:
        orby_response = orby_response + sentence_tokens[idx]
        return orby_response

### Defining the ChatFlow:

In [15]:
flag = True
print('Hello! I am Orby-The Retrieval Learning Bot. Start typing your text after getting to talk to me. For ending the conversation type bye!')
while(flag == True):
    user_response = input()
    user_response = user_response.lower()
    if(user_response != 'bye'):
        if(user_response == 'thank you' or user_response == 'thanks'):
            flag = False
            print('Orby: You are welcome!')
        else:
            if(greet(user_response) != None):
                print('Orby: '+ greet(user_response))
            else:
                sentence_tokens.append(user_response)
                word_tokens = word_tokens + nltk.word_tokenize(user_response)
                final_words = list(set(word_tokens))
                print("Orby: ", end='')
                print(response(user_response))
                sentence_tokens.remove(user_response)
    else:
        flag = False
        print("Orby: Goodbye!")

Hello! I am Orby-The Retrieval Learning Bot. Start typing your text after getting to talk to me. For ending the conversation type bye!
hi
Orby: Hey
there is a chemical spill
Orby: 



please evacuate the building ...
3                                        conversation 2:
4        user: there's been a chemical spill in the lab!
okay
Orby: I am sorry. I am unable to understand you!
thanks
Orby: You are welcome!
