In [3]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
from datetime import datetime
import os
import random
from keras.preprocessing import sequence
import tensorflow as tf
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.optimizers import Adam
from keras.models import load_model
from keras.callbacks import ModelCheckpoint
#from sklearn.cross_validation import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import f1_score
from sklearn.metrics import confusion_matrix
from keras.layers import Bidirectional
from keras.layers import Flatten
from keras.layers import TimeDistributed
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
from keras.layers import ConvLSTM2D
from keras import backend as K

In [4]:
DepressionLevelsFile = "./data/scores.csv"
dperessionLevelsData = pd.read_csv(DepressionLevelsFile)
dperessionLevelsData.head(5)

Unnamed: 0,number,days,gender,age,afftype,melanch,inpatient,edu,marriage,work,madrs1,madrs2
0,condition_1,11,2,35-39,2.0,2.0,2.0,6-10,1.0,2.0,19.0,19.0
1,condition_2,18,2,40-44,1.0,2.0,2.0,6-10,2.0,2.0,24.0,11.0
2,condition_3,13,1,45-49,2.0,2.0,2.0,6-10,2.0,2.0,24.0,25.0
3,condition_4,13,2,25-29,2.0,2.0,2.0,11-15,1.0,1.0,20.0,16.0
4,condition_5,13,2,50-54,2.0,2.0,2.0,11-15,2.0,2.0,26.0,26.0


In [5]:
names = dperessionLevelsData['number'][:23]
MADRS1 = dperessionLevelsData['madrs1'][:23]
MADRS2 = dperessionLevelsData['madrs2'][:23]

In [6]:
MADRS_scores = []
for x in range(len(MADRS1)):
    avg_score = (int(MADRS1[x]) + int(MADRS2[x])) / 2
    if avg_score >= 7 and avg_score <= 19:
        MADRS_scores.append(0)
    if avg_score >= 20 and avg_score <= 34:
        MADRS_scores.append(1)

In [7]:
NameDepLevelMap = dict(zip(names, MADRS_scores))
NameDepLevelMap

{'condition_1': 0,
 'condition_2': 0,
 'condition_3': 1,
 'condition_4': 0,
 'condition_5': 1,
 'condition_6': 0,
 'condition_7': 1,
 'condition_8': 0,
 'condition_9': 1,
 'condition_10': 1,
 'condition_11': 1,
 'condition_12': 1,
 'condition_13': 0,
 'condition_14': 1,
 'condition_15': 0,
 'condition_16': 0,
 'condition_17': 0,
 'condition_18': 0,
 'condition_19': 1,
 'condition_20': 1,
 'condition_21': 1,
 'condition_22': 1,
 'condition_23': 1}

In [8]:
ConditionGroupFileNames = os.listdir('./data/condition')

In [9]:
X = []
y = []

In [10]:
for fileName in ConditionGroupFileNames:
    df = pd.read_csv('data/condition/'+str(fileName))
    dates = df['date'].unique()
    activityLevelsPerDay = []
    for date in dates:
        if len(df[df['date']==date]) == 1440:
            temp = pd.DataFrame(df[df['date']==date]).drop(columns=['timestamp','date'])
            activityLevelsPerDay.append(temp)
    for dailyActivityLevel in activityLevelsPerDay:
        activityVector = np.array(dailyActivityLevel["activity"])
        if len(activityVector) == 1440:
            X.append(activityVector)
            y.append(NameDepLevelMap[str(fileName[:-4])])

In [11]:
combinedDict = list(zip(X, y))
random.shuffle(combinedDict)
X[:], y[:] = zip(*combinedDict)

In [13]:
X = np.array(X)
y = np.array(y)
X = np.reshape(X, (X.shape[0], 1, X.shape[1]))
print(X)

[[[   5   73  227 ...    0    0    0]]

 [[ 477  796  579 ...  210   56   93]]

 [[   3    3    3 ...    3    3    3]]

 ...

 [[   0    0    0 ...  296 1341  338]]

 [[ 163  153  259 ...  959  723 1196]]

 [[   0    0    0 ...    0    0    0]]]


In [14]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [15]:
seed = 7
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=seed)
accuracy_scores = []
prec_scores = []
rec_scores = []
f1_scores = []

In [16]:
for train, test in kfold.split(X, y):
    model = Sequential()
    model.add(LSTM(64, input_shape=(1, 1440), return_sequences=True))
    model.add(LSTM(64, return_sequences=True))
    model.add(LSTM(64))
    model.add(Dense(1, activation='sigmoid'))
    
    adam = Adam(lr=0.001)
    model.compile(loss='binary_crossentropy', optimizer=adam, metrics=['accuracy', recall_m, precision_m, f1_m])
    
    model.fit(X[train], y[train], epochs=10, batch_size=128, verbose=0)
    scores = model.evaluate(X[test], y[test], verbose=0)
    
    print("%s: %.2f%%" % (model.metrics_names[1], scores[1]*100))
    print("%s: %.2f%%" % (model.metrics_names[2], scores[2]))
    print("%s: %.2f%%" % (model.metrics_names[3], scores[3]))
    print("%s: %.2f%%" % (model.metrics_names[4], scores[4]))
    print("\n")
    accuracy_scores.append(scores[1] * 100)
    prec_scores.append(scores[2])
    rec_scores.append(scores[3])
    f1_scores.append(scores[4])
    
print("%.2f%% (+/- %.2f%%)" % (np.mean(accuracy_scores), np.std(accuracy_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(prec_scores), np.std(prec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(rec_scores), np.std(rec_scores)))
print("%.2f%% (+/- %.2f%%)" % (np.mean(f1_scores), np.std(f1_scores)))

2023-04-01 22:47:19.518190: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
  super().__init__(name, **kwargs)


accuracy: 55.56%
recall_m: 0.97%
precision_m: 0.63%
f1_m: 0.76%


accuracy: 75.00%
recall_m: 0.91%
precision_m: 0.70%
f1_m: 0.79%


accuracy: 66.67%
recall_m: 0.97%
precision_m: 0.68%
f1_m: 0.79%


accuracy: 58.33%
recall_m: 1.00%
precision_m: 0.53%
f1_m: 0.70%


accuracy: 63.89%
recall_m: 0.66%
precision_m: 0.57%
f1_m: 0.61%


accuracy: 58.33%
recall_m: 0.77%
precision_m: 0.77%
f1_m: 0.73%


accuracy: 58.33%
recall_m: 0.28%
precision_m: 0.36%
f1_m: 0.31%


accuracy: 52.78%
recall_m: 0.97%
precision_m: 0.52%
f1_m: 0.67%


accuracy: 44.44%
recall_m: 0.79%
precision_m: 0.50%
f1_m: 0.60%


accuracy: 62.86%
recall_m: 0.97%
precision_m: 0.78%
f1_m: 0.85%


59.62% (+/- 7.83%)
0.83% (+/- 0.21%)
0.60% (+/- 0.13%)
0.68% (+/- 0.14%)
