## ChatBot Using Natural Language Processing in Python

In [None]:
import json
import string
import random
import nltk
import numpy as num
from nltk.stem import WordNetLemmatizer
import tensorflow as tensorF
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Download the necessary NLTK data packages
nltk.download("punkt")
nltk.download("wordnet")
nltk.download('punkt_tab')

In [None]:
#3 Loading the Dataset: intents.json

data_file = open('/content/Data.json').read()
data = json.loads(data_file)

data

### Processing data

In [None]:
lm = WordNetLemmatizer() #reducing words to their base or dictionary form

ourClasses = []
newWords = []
documentX = []
documentY = []
# Each intent is tokenized into words and the patterns and their associated tags are added to their respective lists.
for intent in data["ourIntents"]:
    for pattern in intent["patterns"]:
        ournewTkns = nltk.word_tokenize(pattern)
        newWords.extend(ournewTkns)
        documentX.append(pattern)
        documentY.append(intent['tag'])
    if intent["tag"] not in ourClasses:
        ourClasses.append(intent["tag"])

newWords = [lm.lemmatize(word.lower()) for word in newWords if word not in string.punctuation]
newWords = sorted(set(newWords))
ourClasses = sorted(set(ourClasses))

In [None]:
ourClasses

In [8]:
newWords

["'m",
 "'s",
 '10',
 '19',
 'a',
 'age',
 'am',
 'anyone',
 'are',
 'ask',
 'awesome',
 'bad',
 'bbye',
 'be',
 'best',
 'bye',
 'can',
 'contact',
 'could',
 'covid',
 'creator',
 'cricket',
 'current',
 'date',
 'day',
 'designed',
 'developer',
 'do',
 'doing',
 'dumb',
 'e',
 'exit',
 'fine',
 'for',
 'funny',
 'get',
 'good',
 'goodbye',
 'great',
 'haha',
 'he',
 'hello',
 'help',
 'helpful',
 'helping',
 'hey',
 'hi',
 'hola',
 'hot',
 'how',
 'i',
 'idiot',
 'india',
 'inspiration',
 'inspires',
 'is',
 'it',
 'joke',
 'know',
 'later',
 'latest',
 'laugh',
 'lmao',
 'lol',
 'lost',
 'made',
 'make',
 'match',
 'me',
 'motivates',
 'namaste',
 'name',
 'news',
 'next',
 'nice',
 'no',
 'nope',
 'offered',
 'ok',
 'old',
 'praveena',
 'programmed',
 'programmer',
 'provide',
 'question',
 'quit',
 'riddle',
 'rofl',
 'score',
 'see',
 'set',
 'she',
 'shut',
 'song',
 'suggest',
 'suggestion',
 'sup',
 'support',
 'talk',
 'talking',
 'tell',
 'temperature',
 'ten',
 'thank',
 

In [9]:
documentX

['how old are you?',
 'Hi',
 'Hello',
 'Hey',
 'bye',
 'later',
 "what's your name?",
 'who are you?',
 'quit',
 'exit',
 'Hi there',
 'How are you',
 'Is anyone there?',
 'Hey',
 'Hola',
 'Hello',
 'Good day',
 'Namaste',
 'yo',
 'Bye',
 'See you later',
 'Goodbye',
 'Get lost',
 'Till next time',
 'bbye',
 'Thanks',
 'Thank you',
 "That's helpful",
 'Awesome, thanks',
 'Thanks for helping me',
 'How you could help me?',
 'What you can do?',
 'What help you provide?',
 'How you can be helpful?',
 'What support is offered',
 'Tell me a joke',
 'Joke',
 'Make me laugh',
 'Who are you',
 'what are you',
 'What is the time',
 'what is the date',
 'date',
 'time',
 'tell me the date',
 'day',
 'what day is is today',
 'Whats up',
 'Wazzup',
 'How are you',
 'sup',
 'How you doing',
 'haha',
 'lol',
 'rofl',
 'lmao',
 'thats funny',
 'Who made you',
 'who designed you',
 'who programmed you',
 'you are dumb',
 'shut up',
 'idiot',
 'what are you doing',
 'what are you upto',
 'Awesome',
 'G

In [10]:
documentY

['age',
 'greeting',
 'greeting',
 'greeting',
 'goodbye',
 'goodbye',
 'name',
 'name',
 'exit',
 'exit',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'greeting',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'goodbye',
 'thanks',
 'thanks',
 'thanks',
 'thanks',
 'thanks',
 'options',
 'options',
 'options',
 'options',
 'options',
 'jokes',
 'jokes',
 'jokes',
 'Identity',
 'Identity',
 'datetime',
 'datetime',
 'datetime',
 'datetime',
 'datetime',
 'datetime',
 'datetime',
 'whatsup',
 'whatsup',
 'whatsup',
 'whatsup',
 'whatsup',
 'haha',
 'haha',
 'haha',
 'haha',
 'haha',
 'programmer',
 'programmer',
 'programmer',
 'insult',
 'insult',
 'insult',
 'activity',
 'activity',
 'exclaim',
 'exclaim',
 'exclaim',
 'exclaim',
 'exclaim',
 'weather',
 'weather',
 'weather',
 'Praveena',
 'Praveena',
 'Praveena',
 'Praveena',
 'Praveena',
 'contact',
 'contact',
 'contact',
 'contact',
 'appreciate',
 'appreci

In [11]:
for idx, doc in enumerate(documentX):
  print(idx, "  ", doc)

0    how old are you?
1    Hi
2    Hello
3    Hey
4    bye
5    later
6    what's your name?
7    who are you?
8    quit
9    exit
10    Hi there
11    How are you
12    Is anyone there?
13    Hey
14    Hola
15    Hello
16    Good day
17    Namaste
18    yo
19    Bye
20    See you later
21    Goodbye
22    Get lost
23    Till next time
24    bbye
25    Thanks
26    Thank you
27    That's helpful
28    Awesome, thanks
29    Thanks for helping me
30    How you could help me?
31    What you can do?
32    What help you provide?
33    How you can be helpful?
34    What support is offered
35    Tell me a joke
36    Joke
37    Make me laugh
38    Who are you
39    what are you
40    What is the time
41    what is the date
42    date
43    time
44    tell me the date
45    day
46    what day is is today
47    Whats up
48    Wazzup
49    How are you
50    sup
51    How you doing
52    haha
53    lol
54    rofl
55    lmao
56    thats funny
57    Who made you
58    who designed you
59    who prog

### Designing a neural network model

In [12]:
trainingData  = []
outEmpty = [0] * len(ourClasses)

for idx, doc in enumerate(documentX):
    bag0words = []
    text = lm.lemmatize(doc.lower())
    for word in newWords :
        bag0words.append(1) if word in text else bag0words.append(0)

    outputRow = list(outEmpty)
    outputRow[ourClasses.index(documentY[idx])] = 1
    trainingData.append([bag0words, outputRow])

random.shuffle(trainingData)
trainingData = num.array(trainingData, dtype=object)

x = num.array(list(trainingData[:,0]))
y = num.array(list(trainingData[:,1]))

In [13]:
x
y

array([[0, 0, 0, ..., 0, 1, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

#### Defining and training a neural network model


In [14]:
iShape = (len(x[0]),)
oShape = len(y[0])

Model = Sequential()

Model.add(Dense(128, activation="relu" , input_shape=iShape))

Model.add(Dropout(0.5))

Model.add(Dense(64, activation="relu"))

Model.add(Dropout(0.3))

Model.add(Dense(oShape, activation='softmax'))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [15]:
md = tensorF.keras.optimizers.Adam(learning_rate= 0.01)

Model.compile(optimizer=md, loss='categorical_crossentropy', metrics=['accuracy'])

print(Model.summary())

None


In [16]:
Model.fit(x,y, epochs=200, verbose=1)

Epoch 1/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 549ms/step - accuracy: 0.0320 - loss: 3.4603
Epoch 2/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.1409 - loss: 3.2390 
Epoch 3/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2429 - loss: 2.9690 
Epoch 4/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.2876 - loss: 2.6851 
Epoch 5/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.4540 - loss: 2.3341 
Epoch 6/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.3455 - loss: 2.2477 
Epoch 7/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.5178 - loss: 1.8543 
Epoch 8/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - accuracy: 0.6245 - loss: 1.4999 
Epoch 9/200
[1m4/4[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

<keras.src.callbacks.history.History at 0x7a4a907668a0>

In [17]:
# # Save the model
# Model.save('chatbot_model.keras')

# # Save the tokenizer (newWords)
# import pickle
# with open('tokenizer.pickle', 'wb') as handle:
#     pickle.dump(newWords, handle, protocol=pickle.HIGHEST_PROTOCOL)

# # Save the classes (ourClasses)
# with open('classes.pickle', 'wb') as handle:
#     pickle.dump(ourClasses, handle, protocol=pickle.HIGHEST_PROTOCOL)

#### Building useful features

In [18]:
def ourText(text):
  newtkns = nltk.word_tokenize(text)
  newtkns = [lm.lemmatize(word) for word in newtkns]
  return newtkns

def wordBag(text, vocab):
  newtkns = ourText(text)
  bagOwords = [0] * len(vocab)
  for w in newtkns:
    for idx, word in enumerate(vocab):
      if word == w:
        bagOwords[idx] = 1
  return num.array(bagOwords)

def Pclass(text, vocab, labels):
  bagOwords = wordBag(text, vocab)
  ourResult = Model.predict(num.array([bagOwords]))[0]
  newThresh = 0.2
  yp = [[idx, res] for idx, res in enumerate(ourResult) if res > newThresh]

  yp.sort(key=lambda x: x[1], reverse=True)
  newList = []
  for r in yp:
    newList.append(labels[r[0]])
  return newList

def getRes(firstlist, fJson):
  tag = firstlist[0]
  listOfIntents = fJson["ourIntents"]
  for i in listOfIntents:
    if i["tag"] == tag:
      ourResult = random.choice(i["responses"])
      break
  return ourResult

In [19]:
while True:
    newMessage = input("")
    intents = Pclass(newMessage, newWords, ourClasses)
    ourResult = getRes(intents, data)
    print(ourResult)

hii
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 285ms/step
Glad I could make you laugh !
??
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Glad I could make you laugh !
i didnt laugh
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Okay, Good bye!!
byee
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step
Glad I could make you laugh !
stop
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step
Glad I could make you laugh !
exit
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 30ms/step
Bye, Have a nice day 
exit
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step
Okay, Good bye!!


KeyboardInterrupt: Interrupted by user