In [None]:
import json
import numpy as np
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences


In [None]:
with open("intents.json") as file:
    data = json.load(file)



In [None]:
patterns = []
labels = []

for intent in data["intents"]:
    for pattern in intent["patterns"]:
        patterns.append(pattern)
        labels.append(intent["tag"])


In [None]:
tokenizer = Tokenizer(oov_token="<OOV>")
tokenizer.fit_on_texts(patterns)

X = tokenizer.texts_to_sequences(patterns)
X = pad_sequences(X)


In [None]:
lbl_encoder = LabelEncoder()
y = lbl_encoder.fit_transform(labels)


In [None]:
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(len(tokenizer.word_index)+1, 16),
    tf.keras.layers.GlobalAveragePooling1D(),
    tf.keras.layers.Dense(16, activation='relu'),
    tf.keras.layers.Dense(len(set(labels)), activation='softmax')
])

model.compile(loss="sparse_categorical_crossentropy",
              optimizer="adam",
              metrics=["accuracy"])
model.summary()


In [None]:
history = model.fit(X, y, epochs=300)


In [None]:
import pickle

model.save("model.h5")

with open("tokenizer.pkl", "wb") as f:
    pickle.dump(tokenizer, f)

with open("label_encoder.pkl", "wb") as f:
    pickle.dump(lbl_encoder, f)

with open("texts.pkl", "wb") as f:
    pickle.dump(patterns, f)


In [None]:
def test_chatbot(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=X.shape[1])
    pred = model.predict(padded)
    tag = lbl_encoder.inverse_transform([np.argmax(pred)])
    return tag[0]

test_chatbot("halo")
def test_chatbot(text):
    seq = tokenizer.texts_to_sequences([text])
    padded = pad_sequences(seq, maxlen=X.shape[1])
    pred = model.predict(padded)
    tag = lbl_encoder.inverse_transform([np.argmax(pred)])
    return tag[0]

test_chatbot("halo")
