In [None]:
import random
import json
import pickle
import numpy as np
import nltk
from nltk.stem import WordNetLemmatizer
from keras.models import Sequential, Model
from keras.layers import Dense, Dropout, Embedding, Bidirectional, LSTM, Input, Attention, Flatten, Concatenate
from keras.optimizers import SGD
import matplotlib.pyplot as plt

In [None]:
# Load data
lemmatizer = WordNetLemmatizer()
intents = json.loads(open('../data/input/intents.json').read())

In [None]:
# Preprocess data
words = []
classes = []
documents = []
ignore_letters = ['?', '!', '.', ',']

for intent in intents['intents']:
    for pattern in intent['patterns']:
        word_list = nltk.word_tokenize(pattern)
        words.extend(word_list)
        documents.append((word_list, intent['tag']))
        if intent['tag'] not in classes:
            classes.append(intent['tag'])

words = [lemmatizer.lemmatize(word.lower()) for word in words if word not in ignore_letters]
words = sorted(set(words))
classes = sorted(set(classes))

pickle.dump(words, open('../data/output/Attention/words.pkl', 'wb'))
pickle.dump(classes, open('../data/output/Attention/classes.pkl', 'wb'))

training = []
output_empty = [0] * len(classes)

for document in documents:
    bag = []
    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]
    for word in words:
        bag.append(1) if word in word_patterns else bag.append(0)

    output_row = list(output_empty)
    output_row[classes.index(document[1])] = 1
    training.append([bag, output_row])

random.shuffle(training)

In [None]:
# Separate train_x and train_y
train_x = np.array([item[0] for item in training])
train_y = np.array([item[1] for item in training])

In [None]:
# Define model architecture
input_layer = Input(shape=(len(train_x[0]),))
embedding_layer = Embedding(input_dim=len(words), output_dim=100)(input_layer)
lstm_layer = Bidirectional(LSTM(256, return_sequences=True))(embedding_layer)
lstm_layer = Bidirectional(LSTM(256, return_sequences=True))(lstm_layer)


In [None]:
# Apply Dropout for regularization
dropout_layer = Dropout(0.5)(lstm_layer)

In [None]:
# Flatten the output
flatten_layer = Flatten()(dropout_layer)


In [None]:
# Add Dense layers
dense_layer = Dense(128, activation='relu')(flatten_layer)
dense_layer = Dense(64, activation='relu')(dense_layer)

In [None]:
# Output layer
output_layer = Dense(len(train_y[0]), activation='softmax')(dense_layer)


In [None]:
# Define model
model = Model(inputs=[input_layer], outputs=[output_layer])

In [None]:
# Compile model
sgd = SGD(learning_rate=0.01, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])


In [31]:
# Train the model
history = model.fit(train_x, train_y, epochs=7700, batch_size=100, validation_split=0.1)


KeyboardInterrupt: 

In [None]:
# Plot training and validation loss
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Training and Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()

# Plot training and validation accuracy
plt.plot(history.history['accuracy'], label='Training Accuracy')
plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
plt.title('Training and Validation Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()

In [None]:
# Save the model
model.save('../data/output/Attention/Rasika.model')

# Print "Done" after training
print("Done")