In [1]:
import nltk
import numpy as np
import random
import json
import pickle
from nltk.stem.lancaster import LancasterStemmer
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

In [6]:
nltk.download("punkt_tab")
stemmer = LancasterStemmer()


[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt_tab.zip.


In [7]:
with open("intents.json") as file:
    intents = json.load(file)

In [8]:
words = []
classes = []
documents = []
ignore = ["?"]

for intent in intents["intents"]:
    for pattern in intent["patterns"]:
        w = nltk.word_tokenize(pattern)
        words.extend(w)
        documents.append((w, intent["tag"]))
        if intent["tag"] not in classes:
            classes.append(intent["tag"])

words = [stemmer.stem(w.lower()) for w in words if w not in ignore]
words = sorted(list(set(words)))
classes = sorted(list(set(classes)))

print(f"{len(documents)} documents")
print(f"{len(classes)} classes")
print(f"{len(words)} unique stemmed words")

106 documents
37 classes
118 unique stemmed words


In [9]:
training = []
output_empty = [0] * len(classes)

for doc in documents:
    bag = []
    pattern_words = [stemmer.stem(word.lower()) for word in doc[0]]
    for w in words:
        bag.append(1 if w in pattern_words else 0)

    output_row = list(output_empty)
    output_row[classes.index(doc[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)
training = np.array(training, dtype=object)

train_x = np.array(list(training[:, 0]))
train_y = np.array(list(training[:, 1]))


In [10]:
model = Sequential([
    Dense(10, input_shape=(len(train_x[0]),), activation="relu"),
    Dense(10, activation="relu"),
    Dense(len(train_y[0]), activation="softmax")
])

model.compile(
    loss="categorical_crossentropy",
    optimizer=Adam(learning_rate=0.001),
    metrics=["accuracy"]
)

model.fit(train_x, train_y, epochs=150, batch_size=8, verbose=1)
model.save("chatbot_model.h5")

pickle.dump({"words": words, "classes": classes, "train_x": train_x, "train_y": train_y}, open("training_data.pkl", "wb"))


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.0239 - loss: 3.5910    
Epoch 2/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0494 - loss: 3.5775 
Epoch 3/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.0870 - loss: 3.5671 
Epoch 4/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0833 - loss: 3.5531 
Epoch 5/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0803 - loss: 3.5538 
Epoch 6/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0636 - loss: 3.5300     
Epoch 7/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.0671 - loss: 3.5101 
Epoch 8/150
[1m14/14[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.1064 - loss: 3.5061 
Epoch 9/150
[1m14/14[0m [32m━━



In [11]:
def clean_up_sentence(sentence):
    sentence_words = nltk.word_tokenize(sentence)
    sentence_words = [stemmer.stem(word.lower()) for word in sentence_words]
    return sentence_words

def bow(sentence, words):
    sentence_words = clean_up_sentence(sentence)
    bag = [0] * len(words)
    for s in sentence_words:
        for i, w in enumerate(words):
            if w == s:
                bag[i] = 1
    return np.array(bag)

def classify(sentence):
    ERROR_THRESHOLD = 0.25
    input_data = np.array([bow(sentence, words)])
    results = model.predict(input_data)[0]
    results = [[i, r] for i, r in enumerate(results) if r > ERROR_THRESHOLD]
    results.sort(key=lambda x: x[1], reverse=True)
    return [(classes[r[0]], r[1]) for r in results]

def response(sentence):
    results = classify(sentence)
    if results:
        tag = results[0][0]
        for intent in intents["intents"]:
            if intent["tag"] == tag:
                print("Bot:", random.choice(intent["responses"]))
                return
    print("Bot: Sorry, I didn't understand that.")


In [None]:
print("Chatbot is ready! Type 'quit' to exit.\n")
while True:
    msg = input("You: ")
    if msg.lower() in ["quit", "exit"]:
        print("Bot: Goodbye!")
        break
    response(msg)

Chatbot is ready! Type 'quit' to exit.

You: Hi
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 64ms/step
Bot: Hi there, how can I help?
You: Where are you located?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 36ms/step
Bot: Our location is BH-5 LPU
You: Do you accept Mastercard?
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
Bot: We accept most major credit cards
You: Do you provide industrial training
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step
Bot: Yes, we conduct webinars on cybersecurity
