## Import dependencies

In [1]:
# Filepaths, numpy, and Tensorflow
import pandas as pd
import os
import numpy as np
import sqlite3
import tensorflow as tf

In [2]:
# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
# Keras
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

## Load and pre-process data

In [4]:
# Create a connecting to the sqlite database
conn = sqlite3.connect('diseases.sqlite')

# Convert the diseases table to a pandas df
encoded_data = pd.read_sql_query("SELECT * FROM diseases", conn)

# Display the dataframe
encoded_data.head()

Unnamed: 0,acidity,palpitations,anxiety,bladder_discomfort,blurred_and_distorted_vision,breathlessness,burning_micturition,chest_pain,chills,congestion,...,spotting_ urination,stiff_neck,stomach_pain,sweating,swelled_lymph_nodes,throat_irritation,visual_disturbances,vomiting,watering_from_eyes,Disease
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6


In [5]:
# Split the features from the ouput data
X = encoded_data.drop(columns='Disease').values
y = encoded_data.Disease.values

In [6]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# We need to convert our target labels (expected values) to categorical data
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [8]:
# Preview the shapes of the training and testing output data
print("X Training Shape:", X_train.shape)
print("X Testing Shape:", X_test.shape)
print("Y Training Shape:", y_train.shape)
print("Y Testing Shape:", y_test.shape)

X Training Shape: (900, 52)
X Testing Shape: (300, 52)
Y Training Shape: (900, 10)
Y Testing Shape: (300, 10)


## Build the Initial Model

In [9]:
# Create an empty sequential model
model = Sequential()

In [10]:
# Add the first layer where the input dimensions are 36 nodes and relu activation model
model.add(Dense(10, activation='relu', input_dim=X_train.shape[1]))

In [11]:
# Add a second hidden layer
model.add(Dense(10, activation='relu'))

In [12]:
# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model.add(Dense(num_classes, activation='softmax'))

## Model Summary

In [13]:
# We can summarize our model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                530       
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 10)                110       
                                                                 
Total params: 750 (2.93 KB)
Trainable params: 750 (2.93 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


##  Compile and Train Model

In [14]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
 # Fit (train) the model
model.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
29/29 - 2s - loss: 2.3189 - accuracy: 0.1189 - 2s/epoch - 54ms/step
Epoch 2/10
29/29 - 0s - loss: 2.1287 - accuracy: 0.2556 - 92ms/epoch - 3ms/step
Epoch 3/10
29/29 - 0s - loss: 1.9687 - accuracy: 0.2956 - 123ms/epoch - 4ms/step
Epoch 4/10
29/29 - 0s - loss: 1.8226 - accuracy: 0.3111 - 91ms/epoch - 3ms/step
Epoch 5/10
29/29 - 0s - loss: 1.6618 - accuracy: 0.3367 - 79ms/epoch - 3ms/step
Epoch 6/10
29/29 - 0s - loss: 1.5113 - accuracy: 0.4867 - 84ms/epoch - 3ms/step
Epoch 7/10
29/29 - 0s - loss: 1.3657 - accuracy: 0.5556 - 125ms/epoch - 4ms/step
Epoch 8/10
29/29 - 0s - loss: 1.2176 - accuracy: 0.6456 - 82ms/epoch - 3ms/step
Epoch 9/10
29/29 - 0s - loss: 1.0672 - accuracy: 0.6900 - 72ms/epoch - 2ms/step
Epoch 10/10
29/29 - 0s - loss: 0.9242 - accuracy: 0.7656 - 48ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7fc4baa30d90>

## Save and Load Model

In [16]:
# Save the model
model.save("diseases_trained_1.h5")

  saving_api.save_model(


In [17]:
# # Load the model
# from tensorflow.keras.models import load_model
# model = load_model("diseases_trained.h5")

## Evaluate the Model

In [18]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.8549 - accuracy: 0.7700 - 206ms/epoch - 21ms/step
Loss: 0.8549064993858337, Accuracy: 0.7699999809265137


## Model Optimization

In [19]:
# Create an empty sequential model
model2 = Sequential()

# Add the first layer where the input dimensions are 36 nodes and relu activation model
# Increase nodes from 10 to 20
model2.add(Dense(20, activation='relu', input_dim=X_train.shape[1]))

# Add a second hidden layer
model2.add(Dense(10, activation='relu'))

# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model2.add(Dense(num_classes, activation='softmax'))

In [20]:
# We can summarize our optimized model
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 20)                1060      
                                                                 
 dense_4 (Dense)             (None, 10)                210       
                                                                 
 dense_5 (Dense)             (None, 10)                110       
                                                                 
Total params: 1380 (5.39 KB)
Trainable params: 1380 (5.39 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
 # Fit (train) the model
model2.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
29/29 - 1s - loss: 2.2242 - accuracy: 0.2356 - 797ms/epoch - 27ms/step
Epoch 2/10
29/29 - 0s - loss: 1.9333 - accuracy: 0.4400 - 51ms/epoch - 2ms/step
Epoch 3/10
29/29 - 0s - loss: 1.6307 - accuracy: 0.6556 - 53ms/epoch - 2ms/step
Epoch 4/10
29/29 - 0s - loss: 1.3254 - accuracy: 0.8178 - 50ms/epoch - 2ms/step
Epoch 5/10
29/29 - 0s - loss: 1.0336 - accuracy: 0.8911 - 51ms/epoch - 2ms/step
Epoch 6/10
29/29 - 0s - loss: 0.7710 - accuracy: 0.9000 - 56ms/epoch - 2ms/step
Epoch 7/10
29/29 - 0s - loss: 0.5484 - accuracy: 0.9178 - 65ms/epoch - 2ms/step
Epoch 8/10
29/29 - 0s - loss: 0.3833 - accuracy: 0.9811 - 57ms/epoch - 2ms/step
Epoch 9/10
29/29 - 0s - loss: 0.2644 - accuracy: 1.0000 - 51ms/epoch - 2ms/step
Epoch 10/10
29/29 - 0s - loss: 0.1830 - accuracy: 1.0000 - 53ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7fc4b81a9b10>

In [25]:
# Evaluate the second model using the training data
model_loss, model_accuracy = model2.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 1s - loss: 0.1514 - accuracy: 1.0000 - 543ms/epoch - 54ms/step
Loss: 0.15144091844558716, Accuracy: 1.0


In [23]:
# Save the model
model.save("diseases_trained_2.h5")

## Close out SQLite session

In [24]:
# Close out the sqlite connection
conn.close()