CHATBOT

In [1]:
# Importing necessary libraries
import json
import nltk
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import load_model
import pickle

In [2]:
nltk.download('punkt')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\riyag\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!


True

In [3]:
# Load the intents and pre-trained model
with open("intents.json", "r") as file:
    intents = json.load(file)
words = []
classes = []
documents = []
ignore_words = ['?']


In [4]:
print("Looping through the Intents to Convert them to words, classes, documents, and ignore_words.")
for intent in intents['intents']:
    for pattern in intent['patterns']:
        # tokenize each word in the sentence
        w = nltk.word_tokenize(pattern)
        # add to our words list
        words.extend(w)
        # add to documents in our corpus
        documents.append((w, intent['tag']))
        # add to our classes list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

Looping through the Intents to Convert them to words, classes, documents, and ignore_words.


In [5]:
from nltk.stem.lancaster import LancasterStemmer
stemmer = LancasterStemmer()
print("Stemming, Lowering and Removing Duplicates.")
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

Stemming, Lowering and Removing Duplicates.


In [6]:
print("Stemming, Lowering and Removing Duplicates.")
words = [stemmer.stem(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

Stemming, Lowering and Removing Duplicates.


In [7]:
# remove duplicates
classes = sorted(list(set(classes)))

print(len(documents), "documents")
print(len(classes), "classes", classes)
print(len(words), "unique stemmed words", words)

print("Creating the Data for our Model.")
training = []
output = []
print("Creating a List (Empty) for Output.")
output_empty = [0] * len(classes)

425 documents
140 classes ['None', 'advanced_coding_practices', 'advanced_data_science_techniques', 'advanced_learning_strategies', 'agile_project_management', 'appointment', 'book_genre_recommendation', 'book_recommendation', 'building_customer_relationships', 'building_personal_brand', 'building_resilience_skills', 'cancel_order', 'career_advice', 'career_opportunities', 'coding_help', 'coding_resources', 'community_involvement', 'company_values', 'contact', 'cooking_tutorials', 'creative_problem_solving_approaches', 'creative_urban_planning', 'creative_writing_tips', 'cultural_intelligence_development', 'customer_feedback', 'customer_privacy', 'customization', 'digital_marketing_strategies', 'discount', 'education', 'effective_business_networking', 'effective_communication_skills', 'effective_conflict_resolution', 'effective_crisis_management', 'effective_health_and_wellness_routines', 'effective_interpersonal_communication', 'effective_learning_management', 'effective_online_learni

In [8]:
print("Creating Training Set, Bag of Words for our Model.")
for doc in documents:
    # initialize our bag of words
    bag = []
    # list of tokenized words for the pattern
    pattern_words = doc[0]
    # stem each word
    pattern_words = [stemmer.stem(word.lower()) for word in pattern_words]
    # create our bag of words array
    for w in words:
        bag.append(1) if w in pattern_words else bag.append(0)

    # output is a '0' for each tag and '1' for the current tag
    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1

    training.append([bag, output_row])

Creating Training Set, Bag of Words for our Model.


In [9]:
import random
print("Shuffling Randomly and Converting into Numpy Array for Faster Processing.")
random.shuffle(training)

# Separate bags and output_rows into separate lists
bags, output_rows = zip(*training)

# Convert to numpy arrays
train_x = np.array(bags)
train_y = np.array(output_rows)

print("Creating Train and Test Lists.")
# Convert numpy arrays to lists
train_x = list(train_x)
train_y = list(train_y)


print("Building Neural Network for Our Chatbot to be Contextual.")
print("Resetting graph data.")

Shuffling Randomly and Converting into Numpy Array for Faster Processing.
Creating Train and Test Lists.
Building Neural Network for Our Chatbot to be Contextual.
Resetting graph data.


In [10]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import SGD
model = Sequential()
model.add(Dense(8, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dense(8, activation='relu'))
model.add(Dense(len(train_y[0]), activation='softmax'))

sgd = tf.keras.optimizers.legacy.SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)

model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

print("Training.")
model.fit(np.array(train_x), np.array(train_y), epochs=150, batch_size=8, verbose=1)

print("Saving the Model.")
model.save('model_keras.h5')

print("Pickle is also Saved.")
pickle.dump({'words': words, 'classes': classes, 'train_x': train_x, 'train_y': train_y}, open("training_data", "wb"))

print("Loading Pickle.")
data = pickle.load(open("training_data", "rb"))
words = data['words']
classes = data['classes']
train_x = data['train_x']
train_y = data['train_y']

print("Loading the Model.")
# load our saved model
model = tf.keras.models.load_model('model_keras.h5')




Training.
Epoch 1/150
Epoch 2/150
Epoch 3/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 7/150
Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 12/150
Epoch 13/150
Epoch 14/150
Epoch 15/150
Epoch 16/150
Epoch 17/150
Epoch 18/150
Epoch 19/150
Epoch 20/150
Epoch 21/150
Epoch 22/150
Epoch 23/150
Epoch 24/150
Epoch 25/150
Epoch 26/150
Epoch 27/150
Epoch 28/150
Epoch 29/150
Epoch 30/150
Epoch 31/150
Epoch 32/150
Epoch 33/150
Epoch 34/150
Epoch 35/150
Epoch 36/150
Epoch 37/150
Epoch 38/150
Epoch 39/150
Epoch 40/150
Epoch 41/150
Epoch 42/150
Epoch 43/150
Epoch 44/150
Epoch 45/150
Epoch 46/150
Epoch 47/150
Epoch 48/150
Epoch 49/150
Epoch 50/150
Epoch 51/150
Epoch 52/150
Epoch 53/150
Epoch 54/150
Epoch 55/150
Epoch 56/150
Epoch 57/150
Epoch 58/150
Epoch 59/150
Epoch 60/150
Epoch 61/150
Epoch 62/150
Epoch 63/150
Epoch 64/150
Epoch 65/150
Epoch 66/150
Epoch 67/150
Epoch 68/150
Epoch 69/150
Epoch 70/150
Epoch 71/150
Epoch 72/150
Epoch 73/150
Epoch 74/150
Epoch 75/150
Epoch 76/150
Epoch 77/15

  saving_api.save_model(


In [11]:
# Print loaded model accuracy
loaded_model_loss, loaded_model_accuracy = model.evaluate(np.array(train_x), np.array(train_y))
print(f"Loaded Model Accuracy: {loaded_model_accuracy}")

Loaded Model Accuracy: 0.9905882477760315


In [12]:
# Tokenize and preprocess user input
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

In [13]:
# Return a bag of words from the user input
def bow(sentence, words, show_details=True):
    sentence_words = clean_up_sentence(sentence)
    bag = [0]*len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
                if show_details:
                    print("Found in bag: %s" % w)
    return(np.array(bag))

In [14]:
# Predict the intent of the user input
def predict_class(sentence, model, words, classes):
    p = bow(sentence, words, show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})
    return return_list

In [15]:
def classify(sentence):
    # Prediction or To Get the Possibility or Probability from the Model
    results = model.predict(np.array([bow(sentence, words)]))[0]
    # Exclude those results which are Below Threshold
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
    # Sorting is Done because higher Confidence Answer comes first.
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append((classes[r[0]], r[1]))  # Tuple -> Intent and Probability
    return return_list

In [16]:
# Get a response from the chatbot
def response(sentence,userid='123',show_details=True):
    results = classify(sentence)
    # That Means if Classification is Done then Find the Matching Tag.
    if results:
        # Long Loop to get the Result.
        while results:
            for i in intents['intents']:
                # Tag Finding
                if i['tag'] == results[0][0]:
                    # Random Response from High Order Probabilities
                    return print(random.choice(i['responses']))

            results.pop(0)

In [17]:
# Chatbot interaction loop
while True:
    user_input = input("You: ")
    if user_input.lower() == 'quit':
        break

    answer = response(user_input)

Found in bag: hi
Hello, thanks for visiting
Found in bag: hello
Good to see you again
Found in bag: suggest
Found in bag: a
Found in bag: movy
Absolutely! I'd be happy to recommend a movie. What genre or mood are you in the mood for?
Found in bag: tel
Found in bag: a
Found in bag: jok
What do you call fake spaghetti? An impasta.
Found in bag: bye
Bye! Come back again soon.
