## Import dependencies

In [1]:
# Filepaths, numpy, and Tensorflow
import pandas as pd
import os
import numpy as np
import tensorflow as tf

In [2]:
# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [3]:
# Keras
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

## Load and pre-process data

In [4]:
# Read in the CSV encoded data
encoded_data = pd.read_csv("encoded_data.csv")
encoded_data.head()

Unnamed: 0,acidity,palpitations,anxiety,bladder_discomfort,blurred_and_distorted_vision,breathlessness,burning_micturition,chest_pain,chills,congestion,...,spotting_ urination,stiff_neck,stomach_pain,sweating,swelled_lymph_nodes,throat_irritation,visual_disturbances,vomiting,watering_from_eyes,Disease
0,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
1,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
2,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
3,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,6


In [5]:
# Split the features from the ouput data
X = encoded_data.drop(columns='Disease').values
y = encoded_data.Disease.values

In [6]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [7]:
# We need to convert our target labels (expected values) to categorical data
num_classes = 10
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_train[0]

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 1.], dtype=float32)

In [8]:
# Preview the shapes of the training and testing output data
print("X Training Shape:", X_train.shape)
print("X Testing Shape:", X_test.shape)
print("Y Training Shape:", y_train.shape)
print("Y Testing Shape:", y_test.shape)

X Training Shape: (900, 52)
X Testing Shape: (300, 52)
Y Training Shape: (900, 10)
Y Testing Shape: (300, 10)


## Build the model

In [9]:
# Create an empty sequential model
model = Sequential()

In [10]:
# Add the first layer where the input dimensions are 36 nodes and relu activation model
model.add(Dense(20, activation='relu', input_dim=X_train.shape[1]))

In [11]:
# Add a second hidden layer
model.add(Dense(20, activation='relu'))

In [12]:
# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model.add(Dense(num_classes, activation='softmax'))

## Model Summary

In [13]:
# We can summarize our model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 20)                1060      
                                                                 
 dense_1 (Dense)             (None, 20)                420       
                                                                 
 dense_2 (Dense)             (None, 10)                210       
                                                                 
Total params: 1690 (6.60 KB)
Trainable params: 1690 (6.60 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


##  Compile and Train Model

In [14]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [15]:
 # Fit (train) the model
model.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
29/29 - 2s - loss: 2.1854 - accuracy: 0.2567 - 2s/epoch - 74ms/step
Epoch 2/10
29/29 - 0s - loss: 1.8669 - accuracy: 0.5156 - 152ms/epoch - 5ms/step
Epoch 3/10
29/29 - 0s - loss: 1.5068 - accuracy: 0.6344 - 107ms/epoch - 4ms/step
Epoch 4/10
29/29 - 0s - loss: 1.1212 - accuracy: 0.8267 - 70ms/epoch - 2ms/step
Epoch 5/10
29/29 - 0s - loss: 0.7742 - accuracy: 0.9867 - 51ms/epoch - 2ms/step
Epoch 6/10
29/29 - 0s - loss: 0.4867 - accuracy: 1.0000 - 54ms/epoch - 2ms/step
Epoch 7/10
29/29 - 0s - loss: 0.2893 - accuracy: 1.0000 - 52ms/epoch - 2ms/step
Epoch 8/10
29/29 - 0s - loss: 0.1697 - accuracy: 1.0000 - 54ms/epoch - 2ms/step
Epoch 9/10
29/29 - 0s - loss: 0.1044 - accuracy: 1.0000 - 51ms/epoch - 2ms/step
Epoch 10/10
29/29 - 0s - loss: 0.0682 - accuracy: 1.0000 - 46ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7d79a4dd2e00>

## Save and Load Model

In [16]:
# Save the model
model.save("diseases_trained.h5")

  saving_api.save_model(


In [17]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("diseases_trained.h5")

## Evaluate the Model

In [18]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

10/10 - 0s - loss: 0.0577 - accuracy: 1.0000 - 214ms/epoch - 21ms/step
Loss: 0.057721544057130814, Accuracy: 1.0
