<h1>Jason Olefson Project 2 Part 1 Deep Learning</h1>

<h1>Imports</h1>

In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import GRU, Dense, Dropout
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from keras.optimizers import Adam
from keras.regularizers import l2
from keras.models import load_model
import matplotlib.pyplot as plt
import numpy as np
from keras.callbacks import ReduceLROnPlateau

<h1>GPU Check</h1>
<h3>IMPORTANT In this project, I used my GPU (NVIDIA RTX 4080) to train my model. Because of this, you may need to configure the first few blocks on this file to run on your device. Thank you.</h3>

In [None]:
# Check for GPU availability
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# # Check if TensorFlow is using GPU for cuDNN-enabled operations
# print("Is TensorFlow using GPU?: ", tf.test.is_built_with_cuda())

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

In [None]:
if tf.config.list_physical_devices('GPU'):
    print("GPU is available and being used.")
else:
    print("No GPU detected, using CPU.")

<h1>Save Model Function</h1>

In [None]:
def save_model(model, model_dir='saved_model', model_name='project2_part1_model.h5'):
    os.makedirs(model_dir, exist_ok=True) # create dir if no exist
    model_path = os.path.join(model_dir, model_name)
    model.save(model_path)
    print(f"Model saved to: {model_path}")

<h1>Data Prep</h1>

In [None]:
# Define data directory path
data_dir = "./Data/"

data_frames = [] # empty list to hold DataFrames

# Combine data into single DataFrame
# loop over each subfolder
for subfolder in os.listdir(data_dir):
    subfolder_path = os.path.join(data_dir, subfolder)
    # check if directory
    if os.path.isdir(subfolder_path):
        # loop over all csv
        for file in os.listdir(subfolder_path):
            if file.endswith(".csv"):
                file_path = os.path.join(subfolder_path, file)
                df = pd.read_csv(file_path, header=None) #read csv
                data_frames.append(df) # add Datafram to list
combined_data = pd.concat(data_frames, ignore_index=True) #combine into single DataFrame
combined_data.columns = ["Baby_ID", "Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation", "Pain_Level"] # rename columns for clarity
combined_data = combined_data[combined_data["Pain_Level"] != "#"] # remove rows with "#" in the 4th column
# Remove the pesky typo
combined_data["Heart_Rate"] = pd.to_numeric(combined_data["Heart_Rate"], errors='coerce')
combined_data["Respiratory_Rate"] = pd.to_numeric(combined_data["Respiratory_Rate"], errors='coerce')
combined_data["Oxygen_Saturation"] = pd.to_numeric(combined_data["Oxygen_Saturation"], errors='coerce')
combined_data.dropna(subset=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"], inplace=True)
combined_data.head() # for clarity (display first few columns)

<h1>Shuffle/Split Dataset</h1>

In [None]:
combined_data = shuffle(combined_data, random_state=42) # shuffle dataset
train, temp = train_test_split(combined_data, test_size=0.1, random_state=42) # split to training/temp sets (90% training, 10% temp)
validation, test = train_test_split(temp, test_size=0.5, random_state=42) # split temp set into validation/test sets (10% of total each)

# separate features/labels for each set
X_train = train[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_train = train["Pain_Level"]
X_val = validation[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_val = validation["Pain_Level"]
X_test = test[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_test = test["Pain_Level"]

<h1>Normalization</h1>

In [None]:
scaler = StandardScaler()

# Normalization for feature columns
X_train = scaler.fit_transform(train[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
X_val = scaler.transform(validation[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
X_test = scaler.transform(test[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
# Convert normalized arrays back to DataFrames
X_train = pd.DataFrame(X_train, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
X_val = pd.DataFrame(X_val, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
X_test = pd.DataFrame(X_test, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
# Label extraction
y_train = train["Pain_Level"].reset_index(drop=True)
y_val = validation["Pain_Level"].reset_index(drop=True)
y_test = test["Pain_Level"].reset_index(drop=True)

X_train.head() # for clarity (display first few columns)

In [None]:
#Convert to float32/reshape to (samples, timesteps, features)
X_train = X_train.values.astype('float32').reshape(-1, 3, 1)
X_val = X_val.values.astype('float32').reshape(-1, 3, 1)
X_test = X_test.values.astype('float32').reshape(-1, 3, 1)

# Convert to int32
y_train = y_train.astype('int32')
y_val = y_val.astype('int32')
y_test = y_test.astype('int32')

<h1>Learning Rate Adjuster</h1>

In [None]:
lr_adjuster = ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.05,
    patience=5,
    min_lr=1e-6,
    verbose=1
)

<h1>Build GRU Model</h1>

In [None]:
# Model Architecture
model = Sequential([
    GRU(16, return_sequences=True, input_shape=(X_train.shape[1], 1), kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    GRU(16, return_sequences=True, kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    GRU(16, return_sequences=True),
    Dropout(0.2),
    GRU(16, return_sequences=False, kernel_regularizer=l2(0.001)),
    Dropout(0.2),
    Dense(3, activation='softmax') # 3 unit output layer (for 3 classes) / softmax activation
])

# Compile
model.compile(optimizer=Adam(learning_rate=0.01), loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary() # display summary

<h1>Training</h1>

In [None]:
# Epochs/Batch size (adjust as needed)
epochs = 60
batch_size = 32

# Train model
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=epochs,
    batch_size=batch_size,
    verbose=1,
    callbacks=[lr_adjuster]
)

<h1>Save Model</h1>

In [None]:
# Uncomment the below code to save the current model (if happy)
# save_model(model)

<h1>Plot Training and Valid Performance</h1>

In [None]:
# Training/Validation Accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model Accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

# Training/Validation Loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Loss Over Epochs')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Validation'], loc='upper left')
plt.show()

<h1>Report</h1>

<h2>Data Preprocessing and Training Techniques Used</h2>
<ul>
    <li>Normalization</li>
    <li>Dropout</li>
    <li>Regularization</li>
    <li>Train-Validation Splitting</li>
    <li>Shuffling</li>
</ul>
<h2>Below are the Training and Validation Results in Figures</h2>
<img src="Images/Part 1 Model Accuracy.png">
<img src="Images/Part 1 Loss Over Epochs.png">