In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.utils import resample
from sklearn.metrics import precision_score, recall_score, f1_score
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam

# Load the dataset
df = pd.read_csv('Processed Data/Instances Imputed.csv', delimiter='\t')

# Drop non-feature columns
X = df.drop(columns=['Stress Level', 'Date/Time'])
y = df['Stress Level']

# Encode target labels
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y)

# Combine X and y into a single dataframe
df_combined = pd.concat([pd.DataFrame(X), pd.DataFrame({'Stress Level': y_encoded})], axis=1)

# Separate majority and minority classes
df_majority = df_combined[df_combined['Stress Level'] == df_combined['Stress Level'].value_counts().idxmax()]
df_minority = df_combined[df_combined['Stress Level'] != df_combined['Stress Level'].value_counts().idxmax()]

# Downsample majority class
df_majority_downsampled = resample(df_majority,
                                   replace=False,  # sample without replacement
                                   n_samples=df_minority['Stress Level'].value_counts().max(),  # to match minority class
                                   random_state=42)  # reproducible results

# Combine minority class with downsampled majority class
df_downsampled = pd.concat([df_majority_downsampled, df_minority])

# Separate features and target
X_downsampled = df_downsampled.drop(columns=['Stress Level'])
y_downsampled = df_downsampled['Stress Level']

# Normalize the features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_downsampled)

# Reshape the input data to 3D array (samples, timesteps, features)
# Assuming each sample has a single timestep (as we do not have explicit time steps in the data)
X_reshaped = X_scaled.reshape((X_scaled.shape[0], 1, X_scaled.shape[1]))

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X_reshaped, y_downsampled, test_size=0.2, random_state=42)


In [2]:
class Metrics(tf.keras.callbacks.Callback):
    def __init__(self, validation_data):
        super().__init__()
        self.validation_data = validation_data

    def on_epoch_end(self, epoch, logs=None):
        val_predict = np.argmax(self.model.predict(self.validation_data[0]), axis=1)
        val_targ = self.validation_data[1]
        _val_precision = precision_score(val_targ, val_predict, average=None)
        _val_recall = recall_score(val_targ, val_predict, average=None)
        _val_f1 = f1_score(val_targ, val_predict, average=None)
        for i, (p, r, f) in enumerate(zip(_val_precision, _val_recall, _val_f1)):
            print(f" — Class {i} — val_precision: {p:.4f} — val_recall: {r:.4f} — val_f1: {f:.4f}")

# Define the LSTM model
model = Sequential()
model.add(LSTM(100, input_shape=(X_train.shape[1], X_train.shape[2]), return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(3, activation='softmax'))

# Compile the model without specifying accuracy
model.compile(optimizer=Adam(learning_rate=0.001), loss='sparse_categorical_crossentropy')


2024-06-18 18:56:43.981275: I metal_plugin/src/device/metal_device.cc:1154] Metal device set to: Apple M3 Pro
2024-06-18 18:56:43.981291: I metal_plugin/src/device/metal_device.cc:296] systemMemory: 18.00 GB
2024-06-18 18:56:43.981295: I metal_plugin/src/device/metal_device.cc:313] maxCacheSize: 6.00 GB
2024-06-18 18:56:43.981319: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:303] Could not identify NUMA node of platform GPU ID 0, defaulting to 0. Your kernel may not have been built with NUMA support.
2024-06-18 18:56:43.981331: I tensorflow/core/common_runtime/pluggable_device/pluggable_device_factory.cc:269] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 0 MB memory) -> physical PluggableDevice (device: 0, name: METAL, pci bus id: <undefined>)


In [3]:
# Split a validation set from the training data
X_train_full, X_val, y_train_full, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)

# Initialize the custom metric callback with validation data
metrics = Metrics(validation_data=(X_val, y_val))

# Train the model with the custom metrics callback
history = model.fit(X_train_full, y_train_full, epochs=50, batch_size=32, validation_data=(X_val, y_val), verbose=2, callbacks=[metrics])

Epoch 1/50


2024-06-18 18:56:44.640845: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-06-18 18:56:44.740848: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-06-18 18:56:44.872652: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-06-18 18:56:46.971746: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-06-18 18:56:47.009421: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.




2024-06-18 18:56:47.347883: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.
2024-06-18 18:56:47.378992: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:114] Plugin optimizer for device_type GPU is enabled.


 — Class 0 — val_precision: 0.0930 — val_recall: 0.5000 — val_f1: 0.1568
 — Class 1 — val_precision: 0.8155 — val_recall: 0.4112 — val_f1: 0.5467
 — Class 2 — val_precision: 0.1601 — val_recall: 0.2314 — val_f1: 0.1893
343/343 - 3s - loss: 1.0897 - val_loss: 1.0771 - 3s/epoch - 10ms/step
Epoch 2/50
 — Class 0 — val_precision: 0.0947 — val_recall: 0.4608 — val_f1: 0.1571
 — Class 1 — val_precision: 0.8217 — val_recall: 0.3730 — val_f1: 0.5131
 — Class 2 — val_precision: 0.1602 — val_recall: 0.3188 — val_f1: 0.2132
343/343 - 2s - loss: 1.0827 - val_loss: 1.0855 - 2s/epoch - 6ms/step
Epoch 3/50
 — Class 0 — val_precision: 0.0876 — val_recall: 0.5000 — val_f1: 0.1490
 — Class 1 — val_precision: 0.8283 — val_recall: 0.3949 — val_f1: 0.5348
 — Class 2 — val_precision: 0.1627 — val_recall: 0.2314 — val_f1: 0.1911
343/343 - 2s - loss: 1.0798 - val_loss: 1.0740 - 2s/epoch - 6ms/step
Epoch 4/50
 — Class 0 — val_precision: 0.0929 — val_recall: 0.3873 — val_f1: 0.1499
 — Class 1 — val_precision: 0

In [4]:
# Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=1)

# Calculate precision, recall, and F1 score for the test set for each class
precision = precision_score(y_test, y_pred, average=None)
recall = recall_score(y_test, y_pred, average=None)
f1 = f1_score(y_test, y_pred, average=None)

print("Test Precision for each class:", precision)
print("Test Recall for each class:", recall)
print("Test F1 Score for each class:", f1)

Test Precision for each class: [0.08619092 0.85653409 0.16135881]
Test Recall for each class: [0.37804878 0.44014599 0.34311512]
Test F1 Score for each class: [0.14037736 0.58148505 0.21949458]
