<a href="https://colab.research.google.com/github/omkar109/Sleep-Disorder-ML-Model/blob/main/Sleep_Disorder_ML_V2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [10]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from keras import layers
import pandas as pd

#Hyperparameters
learning_rate = 0.01
batch_size = 12
epochs = 100
classification_threshold = 0.8
#If the model is more than 80% confident about a sleep disorder diagnosis, has
#sleep disorder will be marked as true

data = pd.read_csv("Sleep_health_and_lifestyle_dataset.csv") #imports dataset
data = data.sample(frac=1).reset_index(drop=True) #Shuffles the dataset

#Converts categorical features to one-hot encoded features
def one_hot_engineering(feature_list, data):
  for feature in feature_list:
    vocab = data[feature].unique().tolist()
    preprocessing_layer = tf.keras.layers.StringLookup(vocabulary=vocab)
    temp_feature = preprocessing_layer(tf.convert_to_tensor(data[feature])).numpy() -1.0
    prelayer_2 = tf.keras.layers.CategoryEncoding(num_tokens=len(vocab),output_mode="one_hot")
    one_hot_array = prelayer_2(tf.convert_to_tensor(temp_feature)).numpy()
    new_df = pd.DataFrame(one_hot_array, columns=[f'{category}' for category in vocab])
    data = pd.concat([data, new_df], axis=1)
  return data

#Creates a binary label and converts it into a floating point value
def label_engineering(data):
  data["Normal"] = data["Normal"] + data["Normal Weight"]
  data = data.drop("Normal Weight", axis=1)
  data["Has Sleep Disorder"] = (data["Sleep Disorder"] != "None").astype(float)
  return data

categorical_feature_list = ["Gender", "Occupation", "BMI Category"]
data = one_hot_engineering(categorical_feature_list, data)
data = label_engineering(data)


#Scale the features using normalization
def normalize_features(data):
  data_normalized = (data - data.mean()) / data.std()
  data_normalized = data_normalized.drop("Has Sleep Disorder", axis=1)
  data_normalized = pd.concat([data_normalized, data["Has Sleep Disorder"]], axis=1)
  return data_normalized

data = normalize_features(data)

#Compiles list of features to send to model and a label
def feed_to_model(delete_features):
  features = {name:np.array(value) for name, value in data.items()}
  label = np.array(features.pop("Has Sleep Disorder"))
  for feature in delete_features:
    del features[feature]
  return features, label

#List of features the model shouldn't use
delete_features = ["Person ID", "Sleep Disorder", "Occupation", "Gender", "Blood Pressure", "BMI Category"]
features, label = feed_to_model(delete_features)

#Creates dictionary assigning each feature to an input tensor
inputs = {}
for feature in features:
  inputs[feature] = tf.keras.Input(shape=(1,),dtype="float64")

#Concatenates all inputs to form one input layer
concatenated_inputs = tf.keras.layers.Concatenate()(inputs.values())
#Defines shape and activation function of the output layer
dense = layers.Dense(units=1, input_shape=(1,), activation=tf.sigmoid)
output = dense(concatenated_inputs)

#Defines the metrics we want to calculate while training
metrics=[tf.keras.metrics.BinaryAccuracy(threshold=classification_threshold),
         tf.keras.metrics.Precision(thresholds=classification_threshold),
         tf.keras.metrics.Recall(thresholds=classification_threshold)]

#Creates the model
model = tf.keras.Model(inputs=inputs, outputs=output)
model.compile(optimizer=tf.keras.optimizers.experimental.RMSprop(learning_rate=learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(),
              metrics=metrics)

#Trains the model
history = model.fit(x=features, y=label, batch_size=batch_size, epochs=epochs, validation_split=0.2)

#Overall results from the model:
#Everytime the model is trained, it gets a slightly different result due to the nature of ML
#but here are averages after training the model 10 different times (rounded to 4 spots)
#Training Loss: 0.2477
#Training Accuracy: 92.01%
#Validation Loss: 0.3518
#Validation Accuracy: 89.87%

  data_normalized = (data - data.mean()) / data.std()
  data_normalized = (data - data.mean()) / data.std()


Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78