In [1]:
import json
import random
import numpy as np

import nltk
nltk.download('punkt')
nltk.download('wordnet')
from nltk.stem import WordNetLemmatizer
lemmatizer = WordNetLemmatizer()

from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout
from keras.optimizers import SGD

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\saimu\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\saimu\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


In [6]:
import json

with open(r'F:\Coding\project\Chatbot - Bookshop\intents.json', encoding='utf-8') as data_file:
    intents = json.load(data_file)

In [7]:
words=[]
classes = []
documents = []
ignore_words = ['?', '!']

In [8]:
for intent in intents['intents']:
    for pattern in intent['patterns']:

        # take each word and tokenize it
        w = nltk.word_tokenize(pattern)
        words.extend(w)

        # adding documents
        documents.append((w, intent['tag']))

        # adding classes to our class list
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
words = sorted(list(set(words)))

classes = sorted(list(set(classes)))

print (len(documents), "documents")
print (len(words), "unique lemmatized words")
print (len(classes), "classes", classes)

161 documents
107 unique lemmatized words
51 classes ['Adventure stories', 'American fiction', 'Architecture', 'Art', 'Biography & Autobiography', 'Body, Mind & Spirit', 'Business & Economics', "Children's stories", 'Comics & Graphic Novels', 'Computers', 'Cooking', 'Detective and mystery stories', 'Drama', 'Education', 'English fiction', 'Family & Relationships', 'Fantasy fiction', 'Fiction', 'Foreign Language Study', 'Games', 'Health & Fitness', 'History', 'Humor', 'Juvenile Fiction', 'Juvenile Nonfiction', 'Language Arts & Disciplines', 'Law', 'Literary Collections', 'Literary Criticism', 'Medical', 'Music', 'Nature', 'Performing Arts', 'Philosophy', 'Photography', 'Poetry', 'Political Science', 'Psychology', 'Religion', 'Science', 'Science fiction', 'Self-Help', 'Social Science', 'Sports & Recreation', 'Travel', 'True Crime', 'Young Adult Fiction', 'book_search', 'goodbye', 'greeting', 'thanks']


In [11]:
import numpy as np
import random
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()

training = []
output_empty = [0] * len(classes)

for doc in documents:
    # Initializing bag of words
    bag = []

    # List of tokenized words for the pattern
    pattern_words = doc[0]

    # Lemmatize each word - create base word
    pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

    # Create our bag of words array with 1 if word match found in current pattern
    bag = [1 if w in pattern_words else 0 for w in words]

    # Output is a '0' for each tag and '1' for current tag (for each pattern)
    output_row = output_empty[:]  # Copy the list to avoid modifying the original
    output_row[classes.index(doc[1])] = 1

    training.append([np.array(bag, dtype=int), np.array(output_row, dtype=int)])  # Ensure fixed-length arrays

# Shuffle our features and turn into a NumPy array with dtype=object
random.shuffle(training)
training = np.array(training, dtype=object)  # Using dtype=object avoids shape issues

# Create train and test lists. X - patterns, Y - intents
train_x = np.array([t[0] for t in training], dtype=int)
train_y = np.array([t[1] for t in training], dtype=int)

# Print shapes to verify correctness
print("train_x shape:", train_x.shape)
print("train_y shape:", train_y.shape)


train_x shape: (161, 107)
train_y shape: (161, 51)


In [13]:
# Create model - 3 layers. First layer 128 neurons, second layer 64 neurons and 3rd output layer contains number of neurons
# equal to number of intents to predict output intent with softmax
model = Sequential()
model.add(Dense(128, input_shape=(len(train_x[0]),), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(train_y[0]), activation='softmax'))

# Compile model. Stochastic gradient descent with Nesterov accelerated gradient gives good results for this model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

In [14]:
#fitting and saving the model
hist = model.fit(np.array(train_x), np.array(train_y), epochs=500, batch_size=5, verbose=1)
model.save('chatbot_model.h5', hist)

print("model created")

Epoch 1/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 0.0274 - loss: 3.9731    
Epoch 2/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0690 - loss: 3.8729  
Epoch 3/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0834 - loss: 3.8353   
Epoch 4/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0947 - loss: 3.8735
Epoch 5/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0998 - loss: 3.8097
Epoch 6/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.1165 - loss: 3.7119
Epoch 7/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.0734 - loss: 3.6847
Epoch 8/500
[1m33/33[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - accuracy: 0.1158 - loss: 3.5145
Epoch 9/500
[1m33/33[0m [32m━━━━━━━━



model created


Chatbot Prediction

In [15]:
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [lemmatizer.lemmatize(word.lower()) for word in sentence_words]

    return sentence_words

Function for Bag of Wrds

In [16]:
def bow(sentence, words, show_details=True):
    # tokenize the pattern
    sentence_words = clean_up_sentence(sentence)

    # bag of words - matrix of N words, vocabulary matrix
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:

                # assign 1 if current word is in the vocabulary position
                bag[i] = 1
                if show_details:
                    print ("found in bag: %s" % w)

    return(np.array(bag))

Function for Class Prediction

In [17]:
def predict_class(sentence, model):
    # filter out predictions below a threshold
    p = bow(sentence, words,show_details=False)
    res = model.predict(np.array([p]))[0]
    ERROR_THRESHOLD = 0.25
    results = [[i, r] for i, r in enumerate(res) if r > ERROR_THRESHOLD]

    # sort by strength of probability
    results.sort(key=lambda x: x[1], reverse=True)
    return_list = []
    for r in results:
        return_list.append({"intent": classes[r[0]], "probability": str(r[1])})

    return return_list

Function to get chatbot response

In [25]:
def chatbot_response(msg):
    ints = predict_class(msg, model)
    res = getResponse(ints, intents)
    return res  # ✅ Use 'res' instead of 're'


Chatbot Function

In [26]:
def chatbot_response(msg):
    ints = predict_class(msg, model)
    res = getResponse(ints, intents)
    return re

In [28]:
import json
import numpy as np
from tensorflow.keras.models import load_model

# Sample model loading (replace with actual path)
model = load_model("chatbot_model.h5")

# Sample intents data (replace with actual intents)
with open("intents.json", "r", encoding="utf-8") as file:
    intents = json.load(file)

# A function that predicts class based on the message (use your actual model)
def predict_class(msg, model):
    # Tokenization, processing, and prediction (replace with actual code)
    return [0]  # Dummy value for now

# A function to get the response based on intent
def getResponse(ints, intents):
    response = "I recommend 'Sapiens: A Brief History of Humankind' by Yuval Noah Harari."  # Default response
    return response

# Main chatbot function
def chatbot_response(msg):
    ints = predict_class(msg, model)  # Predict the class of the input message
    res = getResponse(ints, intents)  # Get the response based on the predicted class
    return res

# Testing the chatbot
print(chatbot_response('Recommend a book in History'))




I recommend 'Sapiens: A Brief History of Humankind' by Yuval Noah Harari.
