In [1]:
import sys
sys.path.append("..")
# Now you can import your module
from helpers import *



## 1. Importing Libaries

In [2]:
import pandas as pd
import numpy as np
from itertools import combinations
import itertools
import matplotlib.pyplot as plt
import seaborn as sns
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from NN import *

## 2. Create Training, Validation and Test Data

In [3]:
df = pd.read_csv('../data/Threshold_3_Operator_-_Depressionfeature_BP_PHQ_9_PercentofDataset_100.csv')
print_information(df)
# 0.9 train, 0.1 test
train_df, test_df = train_test_split(df, test_size=0.1, random_state=42)
# 0.8 train, 0.2 validation
train_df, validation_df = train_test_split(train_df, train_size=0.8, random_state=42)

train_features, train_targets = df_to_tensor(train_df,features_column='FEATURES', target_col='Depression')
validation_features, validation_targets = df_to_tensor(validation_df, features_column='FEATURES', target_col='Depression')
test_features, test_targets = df_to_tensor(test_df, features_column='FEATURES', target_col='Depression')

print(train_features.shape, train_targets.shape, validation_features.shape, validation_targets.shape, test_features.shape, test_targets.shape)
#print(train_features.shape, train_targets.shape, test_features.shape, test_targets.shape)

train_dataset = CustomDataset(train_features, train_targets)
validation_dataset = CustomDataset(validation_features, validation_targets)
test_dataset = CustomDataset(test_features, test_targets)

train_dataloader = DataLoader(train_dataset, batch_size=2, shuffle=True)
validation_dataloader = DataLoader(validation_dataset, batch_size=2, shuffle=False)
test_dataloader = DataLoader(test_dataset, batch_size=2, shuffle=False)

0.0
ID_1          80
ID_2          80
group_id      80
SEX           80
AGE           80
              ..
FEATURE_51    80
FEATURE_52    80
FEATURE_53    80
FEATURE_54    80
FEATURE_55    80
Length: 65, dtype: int64
1.0
ID_1          80
ID_2          80
group_id      80
SEX           80
AGE           80
              ..
FEATURE_51    80
FEATURE_52    80
FEATURE_53    80
FEATURE_54    80
FEATURE_55    80
Length: 65, dtype: int64
2.0
ID_1          80
ID_2          80
group_id      80
SEX           80
AGE           80
              ..
FEATURE_51    80
FEATURE_52    80
FEATURE_53    80
FEATURE_54    80
FEATURE_55    80
Length: 65, dtype: int64
3.0
ID_1          80
ID_2          80
group_id      80
SEX           80
AGE           80
              ..
FEATURE_51    80
FEATURE_52    80
FEATURE_53    80
FEATURE_54    80
FEATURE_55    80
Length: 65, dtype: int64
6.0
ID_1          80
ID_2          80
group_id      80
SEX           80
AGE           80
              ..
FEATURE_51    80
FEATURE_52   

## 3. Get the best Model with Hyperparameter Tuning

- Creating a configuration with 56 Features as input that where extracted from the wavelet analysis
- 128 Neurons as the hidden layer
- Number of epochs is 60
- the Loss Function is BCE because we have a binary classification problem (Binary Cross Entropy)
- There are 3 model types
    - Model_0 with 1 Hidden Layer so meaning 56 -> 128 -> 1 neuron (Activation Function ReLU)
    - Model_1 has 3 Hidden Layer 56 -> 128 -> 64 -> 32 -> 1 neuron (Activation Function ReLU)
    - Model_2 has the same architecure as Model_1 only with a Activation Function called tanh that has the same property as the sigmoid activation function
- For the Hyperparameter Tuning I select the parameters
    - learning_rate
    - momentum
    - optimizer (Adam and SGD)
    - model type
- After the hyperparameter tuning is done the best performing model is used and trained on again with a bigger number of epochs

In [4]:
# Configuration
input_size = 56
hidden_size = 128
num_epochs = 30

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.BCELoss()

results = {}
results, accuracys = hyperparameter_tuning(train_dataloader, validation_dataloader, test_dataloader, criterion, num_epochs, device)

print(results)

Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6277, Accuracy: 62.50%
Model: 1 is being trained with optimizer: Adam and learning rate: 0.001 and Model_0
Loss: 0.6749, Accuracy: 67.19%
Model: 2 is being trained with optimizer: Adam and learning rate: 0.003 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6445, Accuracy: 65.62%
Model: 3 is being trained with optimizer: Adam and learning rate: 0.006 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.8689, Accuracy: 56.25%
Model: 4 is being trained with optimizer: Adam and learning rate: 0.01 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6018, Accuracy: 64.06%
Model: 5 is being trained with optimizer: SGD and learning rate: 0.001 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.5647, Accuracy: 60.94%
Model: 6 is being trained with optimizer: SGD and learning rate: 0.003 

Save Results into a dataframe 

In [5]:
df = pd.DataFrame()
models_df = []
optimizer_df = []
learning_rate_df = []
momentum_df = []
accuracy_df = []
validation_loss_df = []
for key,value in results.items():
    accuracy_df.append(value['accuracy'])
    validation_loss_df.append(value['validation_loss'])
    opti = key[1]
    models_df.append(key[0])
    optimizer_df.append(opti)
    learning_rate_df.append(key[2])
    if opti == 'SGD':
        momentum_df.append(key[3])
    else:
        momentum_df.append(None)
        
df['Model'] = models_df
df['Optimizer'] = optimizer_df
df['Learning Rate'] = learning_rate_df
df['Momentum'] = momentum_df
df['Validation Loss'] = validation_loss_df
df['Accuracy'] = accuracy_df
df.to_csv('Results/FNN_BP_PHQ_9.csv', index=False)

## 4. Do the same for the MH_PHQ_S Dataset

In [6]:
df2= pd.read_csv('../data/Threshold_15_Operator_-_Depressionfeature_MH_PHQ_S_PercentofDataset_100.csv')
print_information(df2)
# 0.9 train, 0.1 test
train_df2, test_df2 = train_test_split(df2, test_size=0.1, random_state=42)
# 0.8 train, 0.2 validation
train_df2, validation_df2 = train_test_split(train_df2, train_size=0.8, random_state=42)

train_features, train_targets = df_to_tensor(train_df2,features_column='FEATURES', target_col='Depression')
validation_features, validation_targets = df_to_tensor(validation_df2, features_column='FEATURES', target_col='Depression')
test_features, test_targets = df_to_tensor(test_df2, features_column='FEATURES', target_col='Depression')

print(train_features.shape, train_targets.shape, validation_features.shape, validation_targets.shape, test_features.shape, test_targets.shape)
#print(train_features.shape, train_targets.shape, test_features.shape, test_targets.shape)

train_dataset = CustomDataset(train_features, train_targets)
validation_dataset = CustomDataset(validation_features, validation_targets)
test_dataset = CustomDataset(test_features, test_targets)

train_dataloader2 = DataLoader(train_dataset, batch_size=2, shuffle=True)
validation_dataloader2 = DataLoader(validation_dataset, batch_size=2, shuffle=False)
test_dataloader2 = DataLoader(test_dataset, batch_size=2, shuffle=False)


0.0
ID_1          40
ID_2          40
group_id      40
SEX           40
AGE           40
              ..
FEATURE_51    40
FEATURE_52    40
FEATURE_53    40
FEATURE_54    40
FEATURE_55    40
Length: 65, dtype: int64
1.0
ID_1          40
ID_2          40
group_id      40
SEX           40
AGE           40
              ..
FEATURE_51    40
FEATURE_52    40
FEATURE_53    40
FEATURE_54    40
FEATURE_55    40
Length: 65, dtype: int64
2.0
ID_1          40
ID_2          40
group_id      40
SEX           40
AGE           40
              ..
FEATURE_51    40
FEATURE_52    40
FEATURE_53    40
FEATURE_54    40
FEATURE_55    40
Length: 65, dtype: int64
3.0
ID_1          40
ID_2          40
group_id      40
SEX           40
AGE           40
              ..
FEATURE_51    40
FEATURE_52    40
FEATURE_53    40
FEATURE_54    40
FEATURE_55    40
Length: 65, dtype: int64
4.0
ID_1          40
ID_2          40
group_id      40
SEX           40
AGE           40
              ..
FEATURE_51    40
FEATURE_52   

In [7]:

# Configuration
input_size = 56
hidden_size = 128
num_epochs = 30

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
criterion = nn.BCELoss()

results = {}
results, accuracys = cross_validation(train_dataloader2, validation_dataloader2, test_dataloader2, criterion, num_epochs, device)

print(results)


Loss: 0.6766, Accuracy: 55.36%
Model: 1 is being trained with optimizer: Adam and learning rate: 0.001 and Model_0
Loss: 0.6550, Accuracy: 64.29%
Model: 2 is being trained with optimizer: Adam and learning rate: 0.003 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6652, Accuracy: 60.71%
Model: 3 is being trained with optimizer: Adam and learning rate: 0.006 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6899, Accuracy: 58.04%
Model: 4 is being trained with optimizer: Adam and learning rate: 0.01 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6464, Accuracy: 64.29%
Model: 5 is being trained with optimizer: SGD and learning rate: 0.001 and Model_0
Validation loss did not improve for 10 epochs. Early stopping...
Loss: 0.6675, Accuracy: 61.61%
Model: 6 is being trained with optimizer: SGD and learning rate: 0.003 and Model_0
Validation loss did not improve for 10 epochs. Early 

In [8]:
df = pd.DataFrame()
models_df = []
optimizer_df = []
learning_rate_df = []
momentum_df = []
accuracy_df = []
validation_loss_df = []
for key,value in results.items():
    accuracy_df.append(value['accuracy'])
    validation_loss_df.append(value['validation_loss'])
    opti = key[1]
    models_df.append(key[0])
    optimizer_df.append(opti)
    learning_rate_df.append(key[2])
    if opti == 'SGD':
        momentum_df.append(key[3])
    else:
        momentum_df.append(None)
        
df['Model'] = models_df
df['Optimizer'] = optimizer_df
df['Learning Rate'] = learning_rate_df
df['Momentum'] = momentum_df
df['Validation Loss'] = validation_loss_df
df['Accuracy'] = accuracy_df
df.to_csv('Results/FNN_MH_PHQ_S.csv', index=False)

Use the two dataframes created and compare the performances of the models on dataset BP_PHQ_9 and MH_PHQ_S

In [9]:
bp_phq_9 = pd.read_csv('Results/FNN_BP_PHQ_9.csv')
mh_phq_s = pd.read_csv('Results/FNN_MH_PHQ_S.csv')

max_index_phq9 = bp_phq_9['Accuracy'].idxmax()
max_index_mh_phq_s = mh_phq_s['Accuracy'].idxmax()

print(bp_phq_9.loc[max_index_phq9])
print(mh_phq_s.loc[max_index_mh_phq_s])

Model               Model_1
Optimizer               SGD
Learning Rate         0.003
Momentum                0.9
Validation Loss    0.533161
Accuracy            70.3125
Name: 21, dtype: object
Model                Model_2
Optimizer                SGD
Learning Rate          0.001
Momentum                 0.9
Validation Loss     0.638128
Accuracy           66.071429
Name: 36, dtype: object


## 5. Now train the best performing hyperparameters on a larger epoch number

- Dataset BP_PHQ_9 Hyperparameters: Learning Rate: 0.003, Optimizer: SGD, Momentum: 0.9,  Model: Model_1
- Dataset MH_PHQ_S Hyperparameters: Learning Rate: 0.001, Optimizer: SGD, Momentum: 0.9, Model: Model_2

Training for the BP_PHQ_9 Dataset

In [14]:
# Configuration
input_size = 56
hidden_size = 128
num_epochs = 30

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, criterion, and optimizer
model = Depression_Classifier_v_1(input_size, hidden_size).to(device)
#criterion = nn.BCEWithLogitsLoss()  # Combines sigmoid and binary cross-entropy loss
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.003, momentum=0.9)
optimizers = ['Adam', 'SGD']
train_model(model, train_dataloader, criterion, optimizer, num_epochs, device)
evaluate_model(model, test_dataloader, criterion, device)


Epoch [1/30], Step [100/230], Loss: 0.6968
Epoch [1/30], Step [200/230], Loss: 0.6626
Epoch [2/30], Step [100/230], Loss: 0.6531
Epoch [2/30], Step [200/230], Loss: 0.6638
Epoch [3/30], Step [100/230], Loss: 0.6616
Epoch [3/30], Step [200/230], Loss: 0.6170
Epoch [4/30], Step [100/230], Loss: 0.6176
Epoch [4/30], Step [200/230], Loss: 0.6222
Epoch [5/30], Step [100/230], Loss: 0.5859
Epoch [5/30], Step [200/230], Loss: 0.5573
Epoch [6/30], Step [100/230], Loss: 0.5610
Epoch [6/30], Step [200/230], Loss: 0.5154
Epoch [7/30], Step [100/230], Loss: 0.5517
Epoch [7/30], Step [200/230], Loss: 0.5019
Epoch [8/30], Step [100/230], Loss: 0.5512
Epoch [8/30], Step [200/230], Loss: 0.4905
Epoch [9/30], Step [100/230], Loss: 0.5220
Epoch [9/30], Step [200/230], Loss: 0.5084
Epoch [10/30], Step [100/230], Loss: 0.5479
Epoch [10/30], Step [200/230], Loss: 0.4942
Epoch [11/30], Step [100/230], Loss: 0.4791
Epoch [11/30], Step [200/230], Loss: 0.5368
Epoch [12/30], Step [100/230], Loss: 0.4942
Epoch 

(0.5123844356276095, 73.4375)

Save Model

In [15]:
torch.save(model.state_dict(), 'Models/BP_PHQ_9.pth')

Train for the MH_PHQ_S Dataset

In [16]:
# Configuration
input_size = 56
hidden_size = 128
num_epochs = 30

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model, criterion, and optimizer
model = Depression_Classifier_v_2(input_size, hidden_size).to(device)
#criterion = nn.BCEWithLogitsLoss()  # Combines sigmoid and binary cross-entropy loss
criterion = nn.BCELoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)
optimizers = ['Adam', 'SGD']
train_model(model, train_dataloader2, criterion, optimizer, num_epochs, device)
evaluate_model(model, test_dataloader2, criterion, device)


Epoch [1/30], Step [100/403], Loss: 0.6953
Epoch [1/30], Step [200/403], Loss: 0.6952
Epoch [1/30], Step [300/403], Loss: 0.6865
Epoch [1/30], Step [400/403], Loss: 0.6907
Epoch [2/30], Step [100/403], Loss: 0.6946
Epoch [2/30], Step [200/403], Loss: 0.6934
Epoch [2/30], Step [300/403], Loss: 0.6917
Epoch [2/30], Step [400/403], Loss: 0.6875
Epoch [3/30], Step [100/403], Loss: 0.6914
Epoch [3/30], Step [200/403], Loss: 0.6878
Epoch [3/30], Step [300/403], Loss: 0.6950
Epoch [3/30], Step [400/403], Loss: 0.6805
Epoch [4/30], Step [100/403], Loss: 0.6897
Epoch [4/30], Step [200/403], Loss: 0.6802
Epoch [4/30], Step [300/403], Loss: 0.7002
Epoch [4/30], Step [400/403], Loss: 0.6715
Epoch [5/30], Step [100/403], Loss: 0.6787
Epoch [5/30], Step [200/403], Loss: 0.6704
Epoch [5/30], Step [300/403], Loss: 0.7027
Epoch [5/30], Step [400/403], Loss: 0.6837
Epoch [6/30], Step [100/403], Loss: 0.6706
Epoch [6/30], Step [200/403], Loss: 0.6891
Epoch [6/30], Step [300/403], Loss: 0.6793
Epoch [6/30

(0.6400347181728908, 64.28571428571429)

In [17]:
torch.save(model.state_dict(), 'Models/MH_PHQ_S.pth')