In [5]:
!pip install nltk



In [8]:
import nltk
import json
import pickle
import random
import numpy as np
from nltk.stem import WordNetLemmatizer
from keras import models, layers, optimizers
from keras.callbacks import EarlyStopping


In [14]:
# Download necessary NLTK resources
nltk.download('punkt')
nltk.download('wordnet')

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package wordnet to /root/nltk_data...


True

In [9]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Function to preprocess data

In [10]:

# Function to preprocess data
def preprocess_data():
    words = []
    classes = []
    documents = []
    ignore_words = ['?', '!', '@', '$']

    with open('intents.json', 'r', encoding='utf-8') as data_file:
        intents = json.load(data_file)

    lemmatizer = WordNetLemmatizer()

    for intent in intents['intents']:
        for pattern in intent['patterns']:
            if isinstance(pattern, str):
                w = nltk.word_tokenize(pattern)

                words.extend(w)
                documents.append((w, intent['tag']))

                if intent['tag'] not in classes:
                    classes.append(intent['tag'])
            else:
                print("Skipping non-string pattern:", pattern)
    words = [lemmatizer.lemmatize(w.lower()) for w in words if w not in ignore_words]
    words = sorted(list(set(words)))
    classes = sorted(list(set(classes)))


    print(len(words))

    pickle.dump(words, open('words.pkl', 'wb'))
    pickle.dump(classes, open('classes.pkl', 'wb'))

    training_data = []
    output_empty = [0] * len(classes)

    for doc in documents:
        bag = []
        pattern_words = doc[0]
        pattern_words = [lemmatizer.lemmatize(word.lower()) for word in pattern_words]

        for w in words:
            bag.append(1) if w in pattern_words else bag.append(0)

        output_row = list(output_empty)
        output_row[classes.index(doc[1])] = 1

        training_data.append([bag, output_row])

    random.shuffle(training_data)
    training_data = np.asarray(training_data, dtype="object")
    train_x = list(training_data[:, 0])
    train_y = list(training_data[:, 1])

    return train_x, train_y, classes, words

# Function to create and train model

In [16]:

# Function to create and train model
def create_and_train_model(train_x, train_y, words):
    model = models.Sequential([
        layers.Dense(len(words), input_shape=(len(train_x[0]),), activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(1024, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(512, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(256 , activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.3),
        layers.Dense(len(train_y[0]), activation='softmax')
    ])

    model.compile(loss="categorical_crossentropy",
                  optimizer=optimizers.Adam(learning_rate=0.001),
                  metrics=["accuracy"])

    early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    hist = model.fit(np.array(train_x), np.array(train_y),
                     epochs=100,
                     batch_size=24,
                     verbose=1,
                     callbacks=[early_stopping])

    model.save('chatbot_model_5.h5', hist)

# Evaluate model and main function

In [17]:

# Function to evaluate model
def evaluate_model(model, test_x, test_y):
    loss, accuracy = model.evaluate(np.array(test_x), np.array(test_y))
    print("Loss:", loss)
    print("Accuracy:", accuracy)

# Main function
def main():
    train_x, train_y, classes,words = preprocess_data()

    # Splitting data into test and train
    test_size = int(len(train_x) * 0.2)
    test_x = train_x[-test_size:]
    test_y = train_y[-test_size:]
    train_x = train_x[:-test_size]
    train_y = train_y[:-test_size]

    create_and_train_model(train_x, train_y,words)
    model = models.load_model('chatbot_model_5.h5')

    evaluate_model(model, test_x, test_y)


main()

Skipping non-string pattern: {'tag': 'strengthen leg muscles', 'patterns': ['How can I strengthen my leg muscles?', 'Can you recommend leg exercises?', 'What exercises are good for building leg strength?'], 'responses': ['To strengthen your leg muscles, you can include exercises such as squats, lunges, deadlifts, leg presses, and calf raises in your workout routine. These exercises target different muscles in the legs, including the quadriceps, hamstrings, glutes, and calves, and help improve overall leg strength and stability.'], 'context': ['']}
2667
Epoch 1/100



Epoch 2/100



Epoch 3/100



Epoch 4/100



Epoch 5/100



Epoch 6/100



Epoch 7/100



Epoch 8/100



Epoch 9/100



Epoch 10/100



Epoch 11/100



Epoch 12/100



Epoch 13/100



Epoch 14/100



Epoch 15/100



Epoch 16/100



Epoch 17/100



Epoch 18/100



Epoch 19/100



Epoch 20/100



Epoch 21/100



Epoch 22/100



Epoch 23/100



Epoch 24/100



Epoch 25/100



Epoch 26/100



Epoch 27/100



Epoch 28/100



Epoch 29/100



Epoch 30/100



Epoch 31/100



Epoch 32/100



Epoch 33/100



Epoch 34/100



Epoch 35/100



Epoch 36/100



Epoch 37/100



Epoch 38/100



Epoch 39/100



Epoch 40/100



Epoch 41/100



Epoch 42/100



Epoch 43/100



Epoch 44/100



Epoch 45/100



Epoch 46/100



Epoch 47/100



Epoch 48/100



Epoch 49/100



Epoch 50/100



Epoch 51/100



Epoch 52/100



Epoch 53/100



Epoch 54/100



Epoch 55/100



Epoch 56/100



Epoch 57/100



Epoch 58/100



Epoch 59/100



Epoch 60/100



Epoch 61/100



Epoch 62/100



Epoch 63/100



Epoch 64/100



Epoch 65/100



Epoch 66/100



Epoch 67/100



Epoch 68/100



Epoch 69/100



Epoch 70/100



Epoch 71/100



Epoch 72/100



Epoch 73/100



Epoch 74/100



Epoch 75/100



Epoch 76/100



Epoch 77/100



Epoch 78/100



Epoch 79/100



Epoch 80/100



Epoch 81/100



Epoch 82/100



Epoch 83/100



Epoch 84/100



Epoch 85/100



Epoch 86/100



Epoch 87/100



Epoch 88/100



Epoch 89/100



Epoch 90/100



Epoch 91/100



Epoch 92/100



Epoch 93/100



Epoch 94/100



Epoch 95/100



Epoch 96/100



Epoch 97/100



Epoch 98/100



Epoch 99/100



Epoch 100/100





  saving_api.save_model(


Loss: 1.8482376337051392
Accuracy: 0.8092660307884216
