Building a chatbot with NLP and TensorFlow

In [None]:
data={"intents":[
    {"tag":"greeting",
     "patterns":["Hello","How are you?","Hi There","Hi","What's up"],
     "responses":["Howdy Partner!","Hello","How are you doing?","Greetings!","How do you do"]
        },
    {"tag":"age",
     "patterns":["how old are you","when is your birthday","when was you born"],
     "responses":["I am 24 years old","I was born in 1966","My birthday is July 3rd and I was born in 1996","03/07/1996"]
        },
    {"tag":"date",
     "patterns":["what are you doing this weekend",
                "do you want to hangout sometime?","what are your plans for this week"],
     "responses":["I am available this week","I don't have any plans","I am not busy"]
        },
    {"tag":"name",
     "patterns":["what's your name","what are you called","who are you"],
     "responses":["My name is Kippi","I'm Kippi","Kippi"]
        },
    {"tag":"goodbye",
     "patterns":["bye","g2g","see ya","adios","cya"],
     "responses":["It was nice speaking to you","See you later","Speak Soon"]
        },
]}

NLP Libraries for Chatbot Development
Setting Up the NLP Environment for Your Chatbot
Essential Python Libraries for Chatbot NLP Tasks

In [None]:
import json
import string
import random

import nltk
import numpy as np
from nltk.stem import WordNetLemmatizer


In [None]:
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense,Dropout

nltk.download("punkt")
nltk.download("wordnet")

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

Create a vocabulary of all the input_words used in the patterns

Create a list of the all_classes- tage of ach intent

Create a list of all the patterns within the intents file

Create a list of all the associated tags to go with each patterns in the intents file.

In [None]:
lemmatizer=WordNetLemmatizer()

words=[]
classes=[]
doc_x=[]
doc_y=[]

Processes chatbot training data, building vocabulary and mapping patterns to intents.

In [None]:
for intent in data["intents"]:
    for pattern in intent["patterns"]:
        tokens=nltk.word_tokenize(pattern)
        words.extend(tokens)
        doc_x.append(pattern)
        doc_y.append(intent["tag"])
    if intent["tag"] not in classes:
        classes.append(intent["tag"])

Creates a lemmatized word list, filtering out punctuation from the original word list.

In [None]:
words=[lemmatizer.lemmatize(word.lower()) for word in words if word not in string.punctuation]

Sorts and removes duplicates to create unique and ordered word and intent lists.

In [None]:
words=sorted(set(words))
classes=sorted(set(classes))

PRINTING WORDS AND CLASSES

In [None]:
print(words)

["'s", 'adios', 'are', 'birthday', 'born', 'bye', 'called', 'cya', 'do', 'doing', 'for', 'g2g', 'hangout', 'hello', 'hi', 'how', 'is', 'name', 'old', 'plan', 'see', 'sometime', 'there', 'this', 'to', 'up', 'wa', 'want', 'week', 'weekend', 'what', 'when', 'who', 'ya', 'you', 'your']


In [None]:
print(doc_x)

['Hello', 'How are you?', 'Hi There', 'Hi', "What's up", 'how old are you', 'when is your birthday', 'when was you born', 'what are you doing this weekend', 'do you want to hangout sometime?', 'what are your plans for this week', "what's your name", 'what are you called', 'who are you', 'bye', 'g2g', 'see ya', 'adios', 'cya']


In [None]:
print(doc_y)

['greeting', 'greeting', 'greeting', 'greeting', 'greeting', 'age', 'age', 'age', 'date', 'date', 'date', 'name', 'name', 'name', 'goodbye', 'goodbye', 'goodbye', 'goodbye', 'goodbye']


In [None]:
print(classes)

['age', 'date', 'goodbye', 'greeting', 'name']


Creates a Bag-of-Words (BoW) representation of training data with shuffled word vectors and corresponding one-hot encoded intent labels.

In [None]:
training=[]
out_empty=[0]*len(classes)

# creating a bag of words model

for idx, doc in enumerate(doc_x):
    bow=[]
    text=lemmatizer.lemmatize(doc.lower())
    for word in words:
        bow.append(1) if word in text else bow.append(0)
    output_row=list(out_empty)
    output_row[classes.index(doc_y[idx])]=1

    training.append([bow, output_row])

random.shuffle(training)

training=np.array(training,dtype=object)

train_X=np.array(list(training[:,0]))
train_y=np.array(list(training[:,1]))

In [None]:
train_X[:2]

array([[0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0],
       [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]])

In [None]:
train_y[:2]

array([[1, 0, 0, 0, 0],
       [0, 0, 1, 0, 0]])

Defines model parameters: input shape based on training data features, output shape based on number of intent classes, and training epochs.

In [None]:
input_shape=(len(train_X[0]),)
output_shape=len(train_y[0])

epochs=500

Creates and compiles a deep learning model using TensorFlow.keras for intent classification.

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Create a Sequential model
model = Sequential()
model.add(Dense(128, input_shape=input_shape, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(output_shape, activation='softmax'))

# Create the Adam optimizer with a specified learning rate
adam = tf.keras.optimizers.Adam(learning_rate=0.01)

# Compile the model using the Adam optimizer
model.compile(loss='categorical_crossentropy',
              optimizer=adam,
              metrics=['accuracy'])

print(model.summary())

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_9 (Dense)             (None, 128)               4736      
                                                                 
 dropout_6 (Dropout)         (None, 128)               0         
                                                                 
 dense_10 (Dense)            (None, 64)                8256      
                                                                 
 dropout_7 (Dropout)         (None, 64)                0         
                                                                 
 dense_11 (Dense)            (None, 5)                 325       
                                                                 
Total params: 13317 (52.02 KB)
Trainable params: 13317 (52.02 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
None


Trains the deep learning model for intent classification using prepared training data (BoW features and encoded labels) for 500 epochs with progress logging.

In [None]:
model.fit(x=train_X, y=train_y, epochs=500, verbose=1)

Epoch 1/500
Epoch 2/500
Epoch 3/500
Epoch 4/500
Epoch 5/500
Epoch 6/500
Epoch 7/500
Epoch 8/500
Epoch 9/500
Epoch 10/500
Epoch 11/500
Epoch 12/500
Epoch 13/500
Epoch 14/500
Epoch 15/500
Epoch 16/500
Epoch 17/500
Epoch 18/500
Epoch 19/500
Epoch 20/500
Epoch 21/500
Epoch 22/500
Epoch 23/500
Epoch 24/500
Epoch 25/500
Epoch 26/500
Epoch 27/500
Epoch 28/500
Epoch 29/500
Epoch 30/500
Epoch 31/500
Epoch 32/500
Epoch 33/500
Epoch 34/500
Epoch 35/500
Epoch 36/500
Epoch 37/500
Epoch 38/500
Epoch 39/500
Epoch 40/500
Epoch 41/500
Epoch 42/500
Epoch 43/500
Epoch 44/500
Epoch 45/500
Epoch 46/500
Epoch 47/500
Epoch 48/500
Epoch 49/500
Epoch 50/500
Epoch 51/500
Epoch 52/500
Epoch 53/500
Epoch 54/500
Epoch 55/500
Epoch 56/500
Epoch 57/500
Epoch 58/500
Epoch 59/500
Epoch 60/500
Epoch 61/500
Epoch 62/500
Epoch 63/500
Epoch 64/500
Epoch 65/500
Epoch 66/500
Epoch 67/500
Epoch 68/500
Epoch 69/500
Epoch 70/500
Epoch 71/500
Epoch 72/500
Epoch 73/500
Epoch 74/500
Epoch 75/500
Epoch 76/500
Epoch 77/500
Epoch 78

<keras.src.callbacks.History at 0x7f79c3237040>

Creates a Bag-of-Words (BoW) representation of a new text input, considering lemmatization and vocabulary matching.

In [None]:
def clean_text(text):
    tokens=nltk.word_tokenize(text)
    tokens=[lemmatizer.lemmatize(word) for word in tokens]
    return tokens

def bag_of_words(text,vocab):
    tokens=clean_text(text)
    bow=[0]*len(vocab)
    for w in tokens:
        for idx, word in enumerate(vocab):
            if word==w:
                bow[idx]=1
    return np.array(bow)

Creates a BoW representation for the text using the bag_of_words function (assuming it's defined elsewhere).
Predicts probabilities for each intent class using the trained model (model.predict).
Applies a threshold (default 0.2) to filter out low-confidence predictions.
Sorts the remaining predictions by their probability (descending order).
Creates a list containing the corresponding intent labels (from labels) based on the sorted predictions.
Returns the list of predicted intent labels, potentially indicating multiple intents exceeding the threshold.

In [None]:
def pred_class(text, vocab,labels):
    bow=bag_of_words(text, vocab)
    result=model.predict(np.array([bow]))[0]
    thresh=0.2
    y_pred=[[idx,res] for idx, res in enumerate(result) if res>thresh]

    y_pred.sort(key=lambda x:x[1], reverse=True)
    return_list=[]
    for r in y_pred:
        return_list.append(labels[r[0]])
    return return_list

def get_response(intents_list, intents_json):
    tag=intents_list[0]
    list_of_intents=intents_json["intents"]
    for i in list_of_intents:
        if i["tag"]==tag:
            result=random.choice(i["responses"])
            break
    return result

Continuously prompts the user for input, predicts intents using the trained model, and retrieves a response from the chatbot's data.

In [None]:
while True:
    message=input("")
    intents=pred_class(message, words, classes)
    result=get_response(intents,data)
    print(result)

Hello
How are you doing?
very nice
Greetings!
