<h1>Jason Olefson Project 2 Part 1 Deep Learning</h1>

<h1>Imports</h1>

In [1]:
import os
import pandas as pd
import tensorflow as tf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle

<h1>GPU Check</h1>
<h3>IMPORTANT In this project, I used my GPU (NVIDIA RTX 4080) to train my model. Because of this, you may need to configure the first few blocks on this file to run on your device. Thank you.</h3>

In [2]:
# Check for GPU availability
# print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

# # Check if TensorFlow is using GPU for cuDNN-enabled operations
# print("Is TensorFlow using GPU?: ", tf.test.is_built_with_cuda())

from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 15489345033257107239
xla_global_id: -1
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 14046724096
locality {
  bus_id: 1
  links {
  }
}
incarnation: 14145539842361375606
physical_device_desc: "device: 0, name: NVIDIA GeForce RTX 4080, pci bus id: 0000:01:00.0, compute capability: 8.9"
xla_global_id: 416903419
]


In [3]:
if tf.config.list_physical_devices('GPU'):
    print("GPU is available and being used.")
else:
    print("No GPU detected, using CPU.")

GPU is available and being used.


<h1>Data Prep</h1>

In [4]:
# Define data directory path
data_dir = "./Data/"

data_frames = [] # empty list to hold DataFrames

# Combine data into single DataFrame
# loop over each subfolder
for subfolder in os.listdir(data_dir):
    subfolder_path = os.path.join(data_dir, subfolder)
    # check if directory
    if os.path.isdir(subfolder_path):
        # loop over all csv
        for file in os.listdir(subfolder_path):
            if file.endswith(".csv"):
                file_path = os.path.join(subfolder_path, file)
                df = pd.read_csv(file_path, header=None) #read csv
                data_frames.append(df) # add Datafram to list
combined_data = pd.concat(data_frames, ignore_index=True) #combine into single DataFrame
combined_data.columns = ["Baby_ID", "Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation", "Pain_Level"] # rename columns for clarity
combined_data = combined_data[combined_data.iloc[:, 3] != "#"] # remove rows with "#" in the 4th column
combined_data = combined_data[combined_data.iloc[:, 2] != "--1"] # remove that pesky typo
combined_data.head() # for clarity (display first few columns)

Unnamed: 0,Baby_ID,Heart_Rate,Respiratory_Rate,Oxygen_Saturation,Pain_Level
0,0,142,60,100,0
1,1,142,60,100,0
2,2,142,60,100,0
3,3,142,59,100,0
4,4,143,59,100,0


<h1>Shuffle/Split Dataset</h1>

In [7]:
combined_data = shuffle(combined_data, random_state=42) # shuffle dataset
train, temp = train_test_split(combined_data, test_size=0.1, random_state=42) # split to training/temp sets (90% training, 10% temp)
validation, test = train_test_split(temp, test_size=0.5, random_state=42) # split temp set into validation/test sets (10% of total each)

# separate features/labels for each set
X_train = train[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_train = train["Pain_Level"]
X_val = validation[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_val = validation["Pain_Level"]
X_test = test[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]]
y_test = test["Pain_Level"]

<h1>Normalization</h1>

In [8]:
scaler = StandardScaler()

# Normalization for feature columns
X_train = scaler.fit_transform(train[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
X_val = scaler.transform(validation[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
X_test = scaler.transform(test[["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"]])
# Convert normalized arrays back to DataFrames
X_train = pd.DataFrame(X_train, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
X_val = pd.DataFrame(X_val, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
X_test = pd.DataFrame(X_test, columns=["Heart_Rate", "Respiratory_Rate", "Oxygen_Saturation"])
# Label extraction
y_train = train["Pain_Level"].reset_index(drop=True)
y_val = validation["Pain_Level"].reset_index(drop=True)
y_test = test["Pain_Level"].reset_index(drop=True)

X_train.head() # for clarity (display first few columns)

ValueError: could not convert string to float: '--1'