<a href="https://colab.research.google.com/github/kristophersmo/Chatbot_Lenny/blob/main/ChatBot_Lenny.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [26]:
# Mounting the Google Drive
from google.colab import drive
drive.mount('/content/drive')
data_root = '/content/drive/My Drive/ChatBot'

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [27]:
import json
import string
import random
import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Dropout
nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [28]:
data_file = open(data_root + '/intents.json').read()
data = json.loads(data_file)

In [29]:
# Creating data_X and data_Y

words = []    # for BoW model / vocabulary for patterns
classes = []  # for BoW model / vocabulary for tags
data_X = []   # for storing each pattern
data_y = []   # for storing tag corresponding to each pattern in data_X

# Iterating over all the intents

for intent in data["intents"]:
  for pattern in intent["patterns"]:
    tokens = nltk.word_tokenize(pattern) # tokenize each pattern
    words.extend(tokens) # and append tokens to words
    data_X.append(pattern) # appending pattern to data_X
    data_y.append(intent["tag"]) , # appending the associated tag to each pattern

    # Adding the tag to classes if it's not already there
    if intent["tag"] not in classes:
      classes.append(intent["tag"])

# Initializing lemmatizer to get stem of words
lemmatizer = WordNetLemmatizer()

# Lemmatize all the words in the vocaulary and convert them to lowercase
# if the words don't appear in punctuation
words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]
# Sorting the vocabulary and classes in alphabetical order and taking the #
# set to ensure no duplicates occur
words = sorted(set(words))
classes = sorted(set(classes))

In [30]:
# Text to numbers
training = []
out_empty = [0] * len(classes)
# Creating the Bag of Words (BoW) model
for idx, doc in enumerate(data_X):
  bow = []
  text = lemmatizer.lemmatize(doc.lower())
  for word in words:
    bow.append(1) if word in text else bow.append(0)
  # Mark the index of class that the current pattern is associated to
  output_row = list(out_empty)
  output_row[classes.index(data_y[idx])] = 1
  # Add the one hot encoded BoW and associated classes to training
  training.append([bow, output_row])
# Shuffle the data and convert it to an array
random.shuffle(training)
training = np.array(training, dtype=object)
# Split the features and target labels
train_X = np.array(list(training[:, 0]))
train_Y = np.array(list(training[:, 1]))

In [31]:
# The neural network model
model = Sequential()
model.add(Dense(128, input_shape=(len(train_X[0]),), activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(64, activation="relu"))
model.add(Dropout(0.4))
model.add(Dense(len(train_Y[0]), activation="softmax"))
adam = tf.keras.optimizers.legacy.Adam(learning_rate=0.01, decay=1e-6)
model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=["accuracy"])
print(model.summary())
model.fit(x=train_X, y=train_Y, epochs=200, verbose=1)

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_6 (Dense)             (None, 128)               18688     
                                                                 
 dropout_4 (Dropout)         (None, 128)               0         
                                                                 
 dense_7 (Dense)             (None, 64)                8256      
                                                                 
 dropout_5 (Dropout)         (None, 64)                0         
                                                                 
 dense_8 (Dense)             (None, 121)               7865      
                                                                 
Total params: 34809 (135.97 KB)
Trainable params: 34809 (135.97 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None
Epoch 1/200

<keras.src.callbacks.History at 0x7fb4397a0a30>

In [32]:
  # Preprocessing the user input (to take strings)
def clean_text(text):
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return tokens

def bag_of_words(text, vocab):
    tokens = clean_text(text)
    bow = [0] * len(vocab)
    for w in tokens:
      for idx, word in enumerate(vocab):
        if word == w:
          bow[idx] = 1
    return np.array(bow)

def pred_class(text, vocab, labels):
    bow = bag_of_words(text, vocab)
    result = model.predict(np.array([bow]))[0] # Extracting probabilities
    thresh = 0.50
    y_pred = [[indx, res] for indx, res in enumerate(result) if res > thresh]
    y_pred.sort(key=lambda x: x[1], reverse=True) # Sorting by values of probability in decreasing order
    return_list = []
    for r in y_pred:
      return_list.append(labels[r[0]]) # Contains labels/tags for highest probability
    return return_list

def get_response(intents_list, intents_json):
    if len(intents_list) == 0:
      result = "Sorry! I don't understand."
    else:
      tag = intents_list[0]
      list_of_intents = intents_json["intents"]
      for i in list_of_intents:
        if i["tag"] == tag:
          result = random.choice(i["responses"])
          break
    return result

# Interacting with the ChatBot
print("Press 0 if you don't want to interact with the chatbot.")
while True:
    message = input("")
    if message == "0":
      break
    intents = pred_class(message, words, classes)
    result = get_response(intents, data)
    print(result)

Press 0 if you don't want to interact with the chatbot.
hey there
Hello! I'm your United States geographical helper.
what is your name
My name is Lenny! Who are you?
i'm kris
It's a pleasure to meet who I'm serving!
are you a man
Yes, I'm a man
are you a woman
No, I'm a man
where are you located
I live in a server located in the United States.
what do you know
All mammals get goosebumps.
random fact
Animals can be allergic to humans.
that's funny
Thanks! I try to be as entertaining as I am knowledgeable.
are you happy
I'm doing fine, and yourself?
i'm good thanks for asking
That's perfect!
what is the capital of wisconsin
Madison, WI
what is the population of wisconsin
As of 2022, the population of Wisconsin is 5.893 million.
goodbye
Goodbye!
0
