## Import dependencies

In [1]:
# Filepaths, numpy, and Tensorflow
import pandas as pd
import os
import numpy as np
import tensorflow as tf

In [11]:
# sklearn
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [2]:
# Keras
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense

## Load and pre-process data

In [10]:
# Read in the CSV encoded data
encoded_data = pd.read_csv("encoded_data.csv")
encoded_data.head()

Unnamed: 0,acidity,continuous_sneezing,itching,chills,fatigue,indigestion,shivering,skin_rash,cough,headache,...,fast_heart_rate,throat_irritation,redness_of_eyes,rusty_sputum,sinus_pressure,runny_nose,congestion,loss_of_smell,muscle_pain,Disease
0,0,1,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,0,0,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,1,0,1,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,0,1,0,0,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,1,0,1,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [12]:
# Split the features from the ouput data
X = encoded_data.drop(columns='Disease').values
y = encoded_data.Disease.values

In [17]:
# Split training/test datasets
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42, stratify=y)

In [18]:
# We need to convert our target labels (expected values) to categorical data
num_classes = 5
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)
y_train[0]

array([0., 0., 0., 0., 1.], dtype=float32)

In [26]:
# Preview the shapes of the training and testing output data
print("X Training Shape:", X_train.shape)
print("X Testing Shape:", X_test.shape)
print("Y Training Shape:", y_train.shape)
print("Y Testing Shape:", y_test.shape)

X Training Shape: (450, 36)
X Testing Shape: (150, 36)
Y Training Shape: (450, 5)
Y Testing Shape: (150, 5)


## Build the model

In [27]:
# Create an empty sequential model
model = Sequential()

In [29]:
# Add the first layer where the input dimensions are 36 nodes and relu activation model
model.add(Dense(100, activation='relu', input_dim=X_train.shape[1]))

In [30]:
# Add a second hidden layer
model.add(Dense(100, activation='relu'))

In [32]:
# Add our final output layer where the number of nodes
# corresponds to the number of y labels
model.add(Dense(num_classes, activation='softmax'))

## Model Summary

In [33]:
# We can summarize our model
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 100)               3700      
                                                                 
 dense_1 (Dense)             (None, 100)               10100     
                                                                 
 dense_2 (Dense)             (None, 5)                 505       
                                                                 
Total params: 14305 (55.88 KB)
Trainable params: 14305 (55.88 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


##  Compile and Train Model

In [34]:
# Use categorical crossentropy for categorical data and mean squared error for regression
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

In [35]:
 # Fit (train) the model
model.fit(
    X_train,
    y_train,
    epochs=10,
    shuffle=True,
    verbose=2
)

Epoch 1/10
15/15 - 1s - loss: 1.2431 - accuracy: 0.7978 - 1s/epoch - 70ms/step
Epoch 2/10
15/15 - 0s - loss: 0.5466 - accuracy: 1.0000 - 35ms/epoch - 2ms/step
Epoch 3/10
15/15 - 0s - loss: 0.1800 - accuracy: 1.0000 - 30ms/epoch - 2ms/step
Epoch 4/10
15/15 - 0s - loss: 0.0531 - accuracy: 1.0000 - 33ms/epoch - 2ms/step
Epoch 5/10
15/15 - 0s - loss: 0.0202 - accuracy: 1.0000 - 32ms/epoch - 2ms/step
Epoch 6/10
15/15 - 0s - loss: 0.0108 - accuracy: 1.0000 - 36ms/epoch - 2ms/step
Epoch 7/10
15/15 - 0s - loss: 0.0073 - accuracy: 1.0000 - 35ms/epoch - 2ms/step
Epoch 8/10
15/15 - 0s - loss: 0.0055 - accuracy: 1.0000 - 37ms/epoch - 2ms/step
Epoch 9/10
15/15 - 0s - loss: 0.0043 - accuracy: 1.0000 - 40ms/epoch - 3ms/step
Epoch 10/10
15/15 - 0s - loss: 0.0035 - accuracy: 1.0000 - 31ms/epoch - 2ms/step


<keras.src.callbacks.History at 0x7f96cbe12f50>

## Save and Load Model

In [36]:
# Save the model
model.save("diseases_trained.h5")

  saving_api.save_model(


In [37]:
# Load the model
from tensorflow.keras.models import load_model
model = load_model("diseases_trained.h5")

## Evaluate the Model

In [38]:
# Evaluate the model using the training data
model_loss, model_accuracy = model.evaluate(X_test, y_test, verbose=2)
print(f"Loss: {model_loss}, Accuracy: {model_accuracy}")

5/5 - 0s - loss: 0.0030 - accuracy: 1.0000 - 374ms/epoch - 75ms/step
Loss: 0.0030130676459521055, Accuracy: 1.0
