In [10]:
import random
import json
import pickle
import nltk
from nltk.stem import WordNetLemmatizer
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.optimizers import SGD

import numpy as np

In [2]:
lemmatizer = WordNetLemmatizer()

intents = json.loads(open("data.json").read())

words = []
classes = []
documents = []

ignore_letters = ["?", "!", ".", ","]

for intent in intents["intents"]:
    for symptom in intent["symptoms"]:
        word_list = nltk.word_tokenize(symptom)
        words.extend(word_list)
        documents.append((word_list, intent["name"]))

        if intent["name"] not in classes:
            classes.append(intent["name"])
words = [lemmatizer.lemmatize(word)
        for word in words if word not in ignore_letters]

words = sorted(set(words))
classes = sorted(set(classes))

In [3]:
pickle.dump(words, open('words.pkl', 'wb'))
pickle.dump(classes, open('classes.pkl', 'wb'))

In [4]:
dataset = []
template = [0] * len(classes)

for document in documents:
    bag = [0] * len(words)  # Initialize bag with zeros

    word_patterns = document[0]
    word_patterns = [lemmatizer.lemmatize(word.lower()) for word in word_patterns]

    for i, word in enumerate(words):
        if word in word_patterns:
            bag[i] = 1

    output_row = list(template)
    output_row[classes.index(document[1])] = 1
    dataset.append((bag, output_row))

random.shuffle(dataset)

train_x = np.array([data[0] for data in dataset])
train_y = np.array([data[1] for data in dataset])


In [11]:
# Define parameters
input_shape = (len(train_x[0]), 1)  # Input shape would be (length of our input vector, 1)
output_shape = len(train_y[0])  # Output shape would be the number of intent classes

# Reshape the training data to fit the model
train_x = np.array(train_x).reshape(len(train_x), len(train_x[0]), 1)

# Define model
model = Sequential()
model.add(Conv1D(256, 5, activation='relu', input_shape=input_shape))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(64, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(output_shape, activation='softmax'))

# Compile model
sgd = SGD(learning_rate=0.01, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])

# Training model
model.fit(train_x, np.array(train_y), epochs=200, batch_size=5, verbose=2)

# Save model
model.save('medical_chatbot_model.h5')

Epoch 1/200
314/314 - 2s - loss: 5.6093 - accuracy: 0.0038 - 2s/epoch - 7ms/step
Epoch 2/200
314/314 - 2s - loss: 5.5985 - accuracy: 0.0089 - 2s/epoch - 6ms/step
Epoch 3/200
314/314 - 2s - loss: 5.5819 - accuracy: 0.0102 - 2s/epoch - 6ms/step
Epoch 4/200
314/314 - 2s - loss: 5.5751 - accuracy: 0.0109 - 2s/epoch - 6ms/step
Epoch 5/200
314/314 - 2s - loss: 5.5680 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 6/200
314/314 - 2s - loss: 5.5652 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 7/200
314/314 - 2s - loss: 5.5639 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 8/200
314/314 - 2s - loss: 5.5583 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 9/200
314/314 - 2s - loss: 5.5565 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 10/200
314/314 - 2s - loss: 5.5557 - accuracy: 0.0109 - 2s/epoch - 6ms/step
Epoch 11/200
314/314 - 2s - loss: 5.5559 - accuracy: 0.0109 - 2s/epoch - 6ms/step
Epoch 12/200
314/314 - 2s - loss: 5.5518 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 13/200
314/314 - 2s

314/314 - 2s - loss: 5.4910 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 102/200
314/314 - 2s - loss: 5.4944 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 103/200
314/314 - 2s - loss: 5.4866 - accuracy: 0.0128 - 2s/epoch - 6ms/step
Epoch 104/200
314/314 - 2s - loss: 5.4857 - accuracy: 0.0109 - 2s/epoch - 7ms/step
Epoch 105/200
314/314 - 2s - loss: 5.4859 - accuracy: 0.0134 - 2s/epoch - 7ms/step
Epoch 106/200
314/314 - 2s - loss: 5.4876 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 107/200
314/314 - 2s - loss: 5.4910 - accuracy: 0.0121 - 2s/epoch - 6ms/step
Epoch 108/200
314/314 - 2s - loss: 5.4961 - accuracy: 0.0121 - 2s/epoch - 6ms/step
Epoch 109/200
314/314 - 2s - loss: 5.4880 - accuracy: 0.0121 - 2s/epoch - 6ms/step
Epoch 110/200
314/314 - 2s - loss: 5.4878 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 111/200
314/314 - 2s - loss: 5.4866 - accuracy: 0.0121 - 2s/epoch - 6ms/step
Epoch 112/200
314/314 - 2s - loss: 5.4847 - accuracy: 0.0115 - 2s/epoch - 6ms/step
Epoch 113/200
314/

Epoch 200/200
314/314 - 2s - loss: 5.4826 - accuracy: 0.0128 - 2s/epoch - 6ms/step
