In [1]:
# import libraries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
import balance_dataset, create_handcraft_features
import ast
from statistics import mean, stdev

In [2]:
analytic_train = pd.read_csv('Data/train2.csv')
analytic_test = pd.read_csv('Data/test2.csv')
analytic_validation = pd.read_csv("Data/validation2.csv")

# analytic_train = analytic_train.drop(['target'], axis=1)
# analytic_test = analytic_test.drop(['target'], axis=1)

train_features = pd.read_csv('Data/extracted_features_for_train2.csv')
test_features = pd.read_csv('Data/extracted_features_for_test2.csv')
validation_features = pd.read_csv('Data/extracted_features_for_validation2.csv')

# train_features["target"] = 0
# test_features["target"] = 0
# validation_features["target"] = 0

In [3]:
# function and assign labels for second task
def assign_targets(analytic, extracted,df_handcraft):
    extracted["means"] = df_handcraft["means"]
    extracted["stds"] = df_handcraft["stds"]
    extracted["max"] = df_handcraft["max"]
    extracted["min"] = df_handcraft["min"]
    extracted["target"] = 0
    for i in range(0, len(analytic),48):
          extracted.loc[int(i/48), 'target'] = (analytic.loc[i,'afftype']-1)


    return extracted

In [4]:
def create_handcraft_features(data):
    # mean value of activity for every 30 mins
    means = []

    # activity for every 1 min of a whole day
    max_activity = []
    min_activity = []
    stds = []

    # patient's id
    p_id = []

    data2 = pd.DataFrame()

    activity_list = []
    for i in range(0, len(data)):
        activity_list.append(data.iloc[i]["activity"])
        if((i-1)%48 == 0 and i!=0):
          means.append(mean(activity_list))
          stds.append(stdev(activity_list))
          max_activity.append(max(activity_list))
          min_activity.append(min(activity_list))
          p_id.append(data.iloc[int(i/48)]["patient"])
          activity_list = []

    data2["means"] = means
    data2["stds"] = stds
    data2["max"] = max_activity
    data2["min"] = min_activity
    data2["patient"] = p_id

    return data2

In [52]:
df_train = assign_targets(analytic_train, train_features,create_handcraft_features(analytic_train))
df_test = assign_targets(analytic_test, test_features,create_handcraft_features(analytic_test))
df_validation = assign_targets(analytic_test, validation_features,create_handcraft_features(analytic_validation))

# balance the dataset
df_train = balance_dataset.balance_dataset(df_train)
df_test = balance_dataset.balance_dataset(df_test)
df_validation = balance_dataset.balance_dataset(df_validation)
df_validation = df_validation.dropna(axis='rows')

In [53]:
# scale the data from all the features except the target
scaler = StandardScaler()
df_train.iloc[:, 1:-1] = scaler.fit_transform(df_train.iloc[:, 1:-1])
df_test.iloc[:, 1:-1] = scaler.transform(df_test.iloc[:, 1:-1])
df_validation.iloc[:, 1:-1] = scaler.transform(df_validation.iloc[:, 1:-1])

# split to features and targets
X_train = df_train.drop(['target'], axis=1)
y_train = df_train['target']
X_test = df_test.drop(['target'], axis=1)
y_test = df_test['target']
X_validation = df_validation.drop(['target'], axis=1)
y_validation = df_validation['target']

In [54]:
# create a Neural Network that will take the features and the scores and will classify them
# to healthy or depressed patients
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.fc1 = nn.Linear(34, 30)
        self.fc2 = nn.Linear(30, 15)
        self.fc3 = nn.Linear(15, 1)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        # we have classification problem so we will use sigmoid function
        x = torch.sigmoid(self.fc3(x))
        return x


X_train = X_train.reset_index(drop=True)
X_test = X_test.reset_index(drop=True)
y_train = y_train.reset_index(drop=True)
y_test = y_test.reset_index(drop=True)
X_validation = X_validation.reset_index(drop=True)
y_validation = y_validation.reset_index(drop=True)

print()
print(f'We have {len(X_train)} patients in the trainig set')
print(f'{len(X_test)} patients in the test set')
print(f'and {len(X_validation)} patients in the validation set')
print()
print('-----------------------------')

# convert the data to tensors
X_train = torch.tensor(X_train.values, dtype=torch.float32)
X_test = torch.tensor(X_test.values, dtype=torch.float32)
y_train = torch.tensor(y_train.values, dtype=torch.float32)
y_test = torch.tensor(y_test.values, dtype=torch.float32)
X_validation = torch.tensor(X_validation.values, dtype=torch.float32)
y_validation = torch.tensor(y_validation.values, dtype=torch.float32)


##########################################################################################
##########################################################################################
##########################################################################################


def train_test(X_train, X_test, X_validation, y_train, y_test, y_validation, epochs, hyperparameters, lr_decay):
    lr = hyperparameters['lr']
    # set the seeds
    torch.manual_seed(42)
    np.random.seed(42)
    # create the model
    net = Net()
    # create the optimizer
    optimizer = torch.optim.SGD(net.parameters(), **hyperparameters)
    # create the loss function
    criterion = nn.BCELoss() # Binary Cross Entropy loss for binary classification

    # create the lists for the loss and accuracy
    train_losses = []
    validation_losses = []
    test_accuracy = []
    validation_accuracy = []

    # train the model
    for epoch in range(epochs):
        epoch += 1
        # set the model to train mode
        net.train()
        # clear the gradients
        optimizer.zero_grad()
        # make the predictions
        y_pred = net(X_train)
        # calculate the loss
        loss = criterion(y_pred, y_train.unsqueeze(1).float())
        # backpropagation
        loss.backward()
        # update the weights
        optimizer.step()
        # append the loss to the list
        train_losses.append(loss.item())
        # calculate the accuracy
        correct = 0
        total = 0
        with torch.no_grad():
            # set the model to evaluation mode
            net.eval()
            # make the predictions
            y_pred = net(X_validation)
            # calculate the loss
            loss = criterion(y_pred, y_validation.unsqueeze(1).float())
            # append the loss to the list
            validation_losses.append(loss.item())
            # calculate the accuracy
            correct = 0
            total = 0
            # round the predictions
            y_pred = torch.round(y_pred)
            # calculate the accuracy
            correct += (y_pred == y_validation.unsqueeze(1)).sum().item()
            total += y_validation.size(0)
            # append the accuracy to the list
            validation_accuracy.append(correct/total)

        # print the results for every 100 epochs
        if epoch % 100 == 0:
            y_pred = net(X_test)
            # calculate the loss
            test_loss = criterion(y_pred, y_test.unsqueeze(1).float())
            correct = 0
            total = 0
            # round the predictions
            y_pred = torch.round(y_pred)
            # calculate the accuracy
            correct += (y_pred == y_test.unsqueeze(1)).sum().item()
            total += y_test.size(0)
            # append the accuracy to the list
            test_accuracy.append(correct/total)

            print(f'Epoch: {epoch}/{epochs}, Train Loss: {train_losses[-1]:.4f}, Validation Loss: {validation_losses[-1]:.4f}')
            print(f'-------------- Validation Accuracy: {validation_accuracy[-1]:.4f}, Test Accuracy: {test_accuracy[-1]:.4f}')

        # update the learning rate
        if epoch % 1000==0:
            lr= lr * lr_decay

    return test_accuracy[-1], net

# kfold cross validation
# we will use 5 folds
# we will train the model 5 times
def kfold(X_train,  y_train, epochs, hyperparameters, lr_decay):
    from sklearn.model_selection import KFold
    kfolds = 5
    kf = KFold(n_splits=kfolds, shuffle=True, random_state=42)
    # create the lists for the accuracy
    test_accuracy = []

    # train the model 5 times
    for fold, (train_index, test_index) in enumerate(kf.split(X_train)):
        X_train_fold = X_train[train_index]
        X_test_fold = X_train[test_index]
        y_train_fold = y_train[train_index]
        y_test_fold = y_train[test_index]
        print(f'Fold: {fold+1}/{kfolds}')
        print('-----------------------------')
        # train the model
        test_acc, net = train_test(X_train_fold, X_test_fold, y_train_fold, y_test_fold, epochs, hyperparameters, lr_decay)
        # append the accuracy to the list
        test_accuracy.append(test_acc)
        print('-----------------------------')
    # calculate the mean accuracy
    mean_accuracy = np.mean(test_accuracy)
    print(f'Mean accuracy: {mean_accuracy}')
    print('-----------------------------')
    return mean_accuracy, net

#    HYPERPARAMETERS
#^^^^^^^^^^^^^^^^^^^^^^^^
# lr = learning rate
# wd = weight decay
# mm = momentum
# ld = learning rate decay

for lr in {0.0005, 0.002, 0.001}:
    for wd in {0.0001, 0.00001, 0.000001}:
        for mm in {0.9, 0.95}:
            for lr_decay in {0.8, 0.9, 0.99}:
                for epochs in {500}:
                    hyperparameters = {'lr': lr, 'weight_decay': wd, 'momentum': mm}
                    lr_decay = lr_decay
                    # run the model
                    print()
                    print(f'lr: {lr}, wd: {wd}, mm: {mm}, lr_decay: {lr_decay}, epochs: {epochs}')
                    test_accuracy, net = train_test(X_train, X_test, X_validation, y_train, y_test, y_validation, epochs, hyperparameters, lr_decay)

                    # csv with colums: epochs, lr, weight_decay, momentum, accuracy
                    # in order to find the best hyperparameters
                    df = pd.read_csv('outputs/model_2/NN_hyperparameters.csv')

                    df = pd.concat([df, pd.DataFrame([[epochs, hyperparameters['lr'], hyperparameters['weight_decay'], \
                                                    hyperparameters['momentum'], lr_decay, test_accuracy]], columns=['epochs', 'lr', \
                                                    'weight_decay', 'momentum', 'lr_decay',  'accuracy'])], axis=0, ignore_index=True)

                    # change the order of the columns
                    df = df[['epochs', 'lr', 'weight_decay', 'momentum', 'lr_decay',  'accuracy']]
                    df.to_csv('outputs/model_2/NN_hyperparameters.csv', index=False, header=True)
                    # save the model's weights in order to plot the features with their weights
                    torch.save(net.state_dict(), 'outputs/model_2/model_2_weights.pth')
                    print('-----------------------------')

print('-----------------------------')
print('-----------------------------')
print('Best model:')
# read the csv with the hyperparameters and run the model with the best accuracy
df = pd.read_csv('outputs/model_2/NN_hyperparameters.csv')
# sort the values by accuracy
df = df.sort_values(by=['accuracy'], ascending=False)
# reset the index
df = df.reset_index(drop=True)
# get the best hyperparameters
epochs = df['epochs'][0]
lr = df['lr'][0]
weight_decay = df['weight_decay'][0]
momentum = df['momentum'][0]
lr_decay = df['lr_decay'][0]
hyperparameters = {'lr': lr, 'weight_decay': weight_decay, 'momentum': momentum}
# run the model
print()
print(f'lr: {lr}, wd: {weight_decay}, mm: {momentum}, lr_decay: {lr_decay}, epochs: {epochs}')
test_accuracy, net = train_test(X_train, X_test, X_validation, y_train, y_test, y_validation, epochs, hyperparameters, lr_decay)

# save the model's weights in order to plot the features with their weights
torch.save(net.state_dict(), 'outputs/model_2/model_2_weights.pth')


We have 162 patients in the trainig set
56 patients in the test set
and 22 patients in the validation set

-----------------------------

lr: 0.002, wd: 1e-05, mm: 0.9, lr_decay: 0.8, epochs: 500
Epoch: 100/500, Train Loss: 0.6928, Validation Loss: 0.6292
-------------- Validation Accuracy: 0.8636, Test Accuracy: 0.5000
Epoch: 200/500, Train Loss: 0.6818, Validation Loss: 0.6558
-------------- Validation Accuracy: 0.9091, Test Accuracy: 0.5893
Epoch: 300/500, Train Loss: 0.6702, Validation Loss: 0.6637
-------------- Validation Accuracy: 0.7273, Test Accuracy: 0.6429
Epoch: 400/500, Train Loss: 0.6520, Validation Loss: 0.6608
-------------- Validation Accuracy: 0.6818, Test Accuracy: 0.6071
Epoch: 500/500, Train Loss: 0.6220, Validation Loss: 0.6490
-------------- Validation Accuracy: 0.7727, Test Accuracy: 0.5714
-----------------------------

lr: 0.002, wd: 1e-05, mm: 0.9, lr_decay: 0.9, epochs: 500
Epoch: 100/500, Train Loss: 0.6928, Validation Loss: 0.6292
-------------- Validatio