## Import dependencies

In [1]:
# Filepaths, numpy, and Tensorflow
import pandas as pd
import os
import numpy as np
import sqlite3
import tensorflow as tf

In [2]:
# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
# Keras
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

## Load and pre-process data

In [4]:
# Create a connecting to the sqlite database
conn = sqlite3.connect('diseases.sqlite')

# Convert the diseases table to a pandas df
encoded_data = pd.read_sql_query("SELECT * FROM diseases", conn)

# Display the dataframe
encoded_data.head()

Unnamed: 0,acidity,anxiety,bladder_discomfort,blurred_and_distorted_vision,breathlessness,burning_micturition,chest_pain,chills,congestion,continuous_feel_of_urine,...,spotting_ urination,stiff_neck,stomach_pain,sweating,swelled_lymph_nodes,throat_irritation,visual_disturbances,vomiting,watering_from_eyes,Disease
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6


In [5]:
# Split the features from the ouput data
X = encoded_data.drop(columns='Disease').values
y = encoded_data.Disease.values

In [6]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# We need to convert our target labels (expected values) to categorical data
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [8]:
# Preview the shapes of the training and testing output data
print("X Training Shape:", X_train.shape)
print("X Testing Shape:", X_test.shape)
print("Y Training Shape:", y_train.shape)
print("Y Testing Shape:", y_test.shape)

X Training Shape: (900, 51)
X Testing Shape: (300, 51)
Y Training Shape: (900, 10)
Y Testing Shape: (300, 10)


## Build the Initial Model

In [9]:
# Create an empty sequential model
model = Sequential()

In [10]:
# Add the first layer where the input dimensions are 36 nodes and relu activation model
model.add(Dense(10, activation='relu', input_dim=X_train.shape[1]))

In [11]:
# Add a second hidden layer
model.add(Dense(10, activation='relu'))

In [12]:
# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model.add(Dense(num_classes, activation='softmax'))

## Model Summary

In [13]:
# We can summarize our model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 10)                520       
                                                                 
 dense_1 (Dense)             (None, 10)                110       
                                                                 
 dense_2 (Dense)             (None, 10)                110       
                                                                 
Total params: 740 (2.89 KB)
Trainable params: 740 (2.89 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


##  Compile and Train Model

In [14]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
 # Fit (train) the model
model.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
29/29 - 2s - loss: 2.1918 - accuracy: 0.1000 - 2s/epoch - 72ms/step
Epoch 2/10
29/29 - 0s - loss: 2.0297 - accuracy: 0.1700 - 72ms/epoch - 2ms/step
Epoch 3/10
29/29 - 0s - loss: 1.8513 - accuracy: 0.3389 - 75ms/epoch - 3ms/step
Epoch 4/10
29/29 - 0s - loss: 1.6481 - accuracy: 0.5356 - 71ms/epoch - 2ms/step
Epoch 5/10
29/29 - 0s - loss: 1.4268 - accuracy: 0.7478 - 68ms/epoch - 2ms/step
Epoch 6/10
29/29 - 0s - loss: 1.1995 - accuracy: 0.6911 - 121ms/epoch - 4ms/step
Epoch 7/10
29/29 - 0s - loss: 0.9988 - accuracy: 0.6933 - 101ms/epoch - 3ms/step
Epoch 8/10
29/29 - 0s - loss: 0.8317 - accuracy: 0.7422 - 80ms/epoch - 3ms/step
Epoch 9/10
29/29 - 0s - loss: 0.6896 - accuracy: 0.8456 - 75ms/epoch - 3ms/step
Epoch 10/10
29/29 - 0s - loss: 0.5627 - accuracy: 0.9900 - 72ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7cdd141fa0e0>

## Save and Load Model

In [16]:
# Save the model
model.save("diseases_trained_1.h5")

  saving_api.save_model(


In [17]:
# # Load the model
# from tensorflow.keras.models import load_model
# model = load_model("diseases_trained.h5")

## Evaluate the Model

In [18]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.4942 - accuracy: 0.9833 - 337ms/epoch - 34ms/step
Loss: 0.4942113757133484, Accuracy: 0.9833333492279053


## Model Optimization

In [19]:
# Create an empty sequential model
model2 = Sequential()

# Add the first layer where the input dimensions are 36 nodes and relu activation model
# Increase nodes from 10 to 20
model2.add(Dense(20, activation='relu', input_dim=X_train.shape[1]))

# Add a second hidden layer
model2.add(Dense(10, activation='relu'))

# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model2.add(Dense(num_classes, activation='softmax'))

In [20]:
# We can summarize our optimized model
model2.summary()

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_3 (Dense)             (None, 20)                1040      
                                                                 
 dense_4 (Dense)             (None, 10)                210       
                                                                 
 dense_5 (Dense)             (None, 10)                110       
                                                                 
Total params: 1360 (5.31 KB)
Trainable params: 1360 (5.31 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [21]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model2.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [22]:
 # Fit (train) the model
model2.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
29/29 - 2s - loss: 2.2576 - accuracy: 0.1133 - 2s/epoch - 57ms/step
Epoch 2/10
29/29 - 0s - loss: 2.0526 - accuracy: 0.3600 - 82ms/epoch - 3ms/step
Epoch 3/10
29/29 - 0s - loss: 1.8075 - accuracy: 0.6156 - 87ms/epoch - 3ms/step
Epoch 4/10
29/29 - 0s - loss: 1.5104 - accuracy: 0.7089 - 84ms/epoch - 3ms/step
Epoch 5/10
29/29 - 0s - loss: 1.2115 - accuracy: 0.8389 - 79ms/epoch - 3ms/step
Epoch 6/10
29/29 - 0s - loss: 0.9545 - accuracy: 0.8622 - 70ms/epoch - 2ms/step
Epoch 7/10
29/29 - 0s - loss: 0.7327 - accuracy: 0.9222 - 46ms/epoch - 2ms/step
Epoch 8/10
29/29 - 0s - loss: 0.5384 - accuracy: 0.9933 - 44ms/epoch - 2ms/step
Epoch 9/10
29/29 - 0s - loss: 0.3819 - accuracy: 0.9989 - 42ms/epoch - 1ms/step
Epoch 10/10
29/29 - 0s - loss: 0.2504 - accuracy: 1.0000 - 54ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7cdd0768df60>

In [23]:
# Evaluate the second model using the training data
model_loss, model_accuracy = model2.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.1936 - accuracy: 1.0000 - 161ms/epoch - 16ms/step
Loss: 0.19358396530151367, Accuracy: 1.0


In [24]:
# Save the model
model.save("diseases_trained_2.h5")

## Close out SQLite session

In [25]:
# Close out the sqlite connection
conn.close()