In [135]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from matplotlib.figure import Figure
import matplotlib.axes._axes as axes
import os

sns.set()

In [136]:
# import pytorch specific utils
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from torchsummary import summary
from datetime import datetime
import hiddenlayer as hl

In [137]:
cwd = os.getcwd()
data_file = f"{cwd}//df_depressionlevel_median_imputed.csv"
depression_data = pd.read_csv(data_file)
depression_data.head()

Unnamed: 0,Patient-ID,depression_score,date,week_num,location_daily_locationvariance,location_daily_loglocationvariance,location_daily_totaldistance,location_daily_averagespeed,location_daily_varspeed,location_daily_circadianmovement,...,call_incoming_daily_count,call_incoming_daily_distinctcontacts,call_incoming_daily_meanduration,call_incoming_daily_sumduration,call_incoming_daily_minduration,call_incoming_daily_maxduration,call_incoming_daily_modeduration,call_incoming_daily_timefirstcall,call_incoming_daily_timelastcall,depression_level
0,19HL,9,5/19/2020,1,0.041307,-3.009923,109518.1562,26.916578,4451.8862,0.523614,...,3.285714,3.285714,1403.285714,3441.571429,825.714286,2641.142857,2229.428571,394.142857,871.285714,2
1,19HL,12,5/26/2020,2,0.000125,-4.781617,13617.57104,4.725519,248.16336,-0.563407,...,3.571429,3.571429,894.35119,4736.0,9.857143,3103.857143,11.428571,634.142857,907.142857,3
2,19HL,11,6/2/2020,3,7.5e-05,-3.295846,8220.443966,75.056043,1018.057662,-0.340036,...,4.571429,4.571429,1946.67517,7748.285714,95.714286,5190.0,2585.285714,629.285714,1216.571429,3
3,19HL,7,6/9/2020,4,8.1e-05,-3.292259,9832.078288,11.578641,576.307436,-0.236498,...,3.857143,3.857143,603.566667,3692.857143,10.714286,2197.714286,70.0,300.714286,697.571429,2
4,19HL,15,7/7/2020,8,3.115409,-3.382519,254789.9331,67.341228,10160.117,0.214006,...,4.285714,4.285714,1949.355556,8088.428571,768.571429,3635.714286,2073.428571,507.857143,1041.857143,4


In [138]:
x_features = depression_data.drop(axis=1,
                                  labels=['Patient-ID',
                                          'depression_score',
                                          'date', 'week_num',
                                          'depression_level'])

y_features = depression_data['depression_score']

In [139]:
x_features_logscaled = x_features.copy()

for col in x_features.columns:
    skew = x_features[col].skew()
    if np.abs(skew) > 1:
        # then scale the feature
        x_features_logscaled[col] = x_features[col].apply(lambda x: np.log(x))

x_features_logscaled.head()

Unnamed: 0,location_daily_locationvariance,location_daily_loglocationvariance,location_daily_totaldistance,location_daily_averagespeed,location_daily_varspeed,location_daily_circadianmovement,location_daily_numberofsignificantplaces,location_daily_numberlocationtransitions,location_daily_radiusgyration,location_daily_timeattop1,...,ar_daily_sumvehicle,call_incoming_daily_count,call_incoming_daily_distinctcontacts,call_incoming_daily_meanduration,call_incoming_daily_sumduration,call_incoming_daily_minduration,call_incoming_daily_maxduration,call_incoming_daily_modeduration,call_incoming_daily_timefirstcall,call_incoming_daily_timelastcall
0,-3.186726,-3.009923,11.603846,3.292742,8.401083,0.523614,2.181224,4.030441,-4.361094,540.428571,...,3.478414,1.189584,1.189584,7.246572,8.143683,6.716249,7.878967,7.709501,394.142857,871.285714
1,-8.990827,-4.781617,9.519116,1.552977,5.514087,-0.563407,1.421386,2.410799,-3.818417,1043.285714,...,2.877765,1.272966,1.272966,6.796099,8.462948,2.288196,8.040401,2.436116,634.142857,907.142857
2,-9.50203,-3.295846,9.014379,4.318235,6.925652,-0.340036,0.619039,0.826679,-4.70567,1342.142857,...,2.231861,1.519826,1.519826,7.573878,8.955227,4.561368,8.554489,7.857591,629.285714,1216.571429
3,-9.423534,-3.292259,9.193406,2.449162,6.356641,-0.236498,1.098612,1.742969,-4.569582,663.857143,...,2.337969,1.349927,1.349927,6.402857,8.214156,2.371578,7.695173,4.248495,300.714286,697.571429
4,1.136361,-3.382519,12.448195,4.209773,9.226225,0.214006,1.349927,2.31677,-3.046489,713.714286,...,5.07897,1.455287,1.455287,7.575254,8.99819,6.644534,8.198561,7.636959,507.857143,1041.857143


In [140]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
x_features_transformed = scaler.fit_transform(x_features_logscaled)
# %%
y_features = np.array(y_features)
y_features.shape

(192,)

In [141]:
X = torch.from_numpy(x_features_transformed.astype(np.float32))
Y = torch.from_numpy(y_features.astype(np.float32)).view(y_features.shape[0], 1)
# %%
from sklearn.model_selection import train_test_split

trainx, testx, trainy, testy = train_test_split(X, Y, test_size=0.2,
                                                random_state=42)

In [142]:
trainx.dtype

torch.float32

In [143]:
eps = torch.tensor(1e-7, dtype=torch.float32)


In [144]:
def coeff_determination(y_true, y_pred):
    ss_res = torch.sum((y_true - y_pred) ** 2)
    ss_tot = torch.sum((y_true - torch.mean(y_true)) ** 2)

    return (1 - ss_res / (ss_tot + eps))


In [145]:
from collections import OrderedDict

all_layers = OrderedDict()


def create_model(first_layer_units=1024):
    # create the constant part of the model
    all_layers['l0'] = nn.Linear(in_features=X.shape[1],
                                 out_features=first_layer_units)
    all_layers['r0'] = nn.ReLU()
    #all_layers['b0'] = nn.BatchNorm1d(num_features=first_layer_units)

    # populate the neuron list
    units_list = []
    units_list.append(first_layer_units)
    while first_layer_units % 2 == 0:
        first_layer_units /= 2
        units_list.append(int(first_layer_units))

    print(f"this is units_list: {units_list}")
    # create the variable part of the model
    for i, units in enumerate(units_list):
        if i < len(units_list) - 2:
            all_layers[f"l{i + 1}"] = nn.Linear(in_features=units_list[i],
                                                out_features=units_list[i + 1])
            all_layers[f"r{i + 1}"] = nn.ReLU()
            # all_layers[f"b{i + 1}"] = nn.BatchNorm1d(
            #     num_features=units_list[i + 1])
        else:
            break

    # return the created model
    print(i)
    all_layers[f"l{i + 1}"] = nn.Linear(in_features=units_list[i],
                                        out_features=1)

    # return the model
    return nn.Sequential(all_layers)

In [146]:
model = create_model(first_layer_units=1024)
model

this is units_list: [1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1]
9


Sequential(
  (l0): Linear(in_features=61, out_features=1024, bias=True)
  (r0): ReLU()
  (l1): Linear(in_features=1024, out_features=512, bias=True)
  (r1): ReLU()
  (l2): Linear(in_features=512, out_features=256, bias=True)
  (r2): ReLU()
  (l3): Linear(in_features=256, out_features=128, bias=True)
  (r3): ReLU()
  (l4): Linear(in_features=128, out_features=64, bias=True)
  (r4): ReLU()
  (l5): Linear(in_features=64, out_features=32, bias=True)
  (r5): ReLU()
  (l6): Linear(in_features=32, out_features=16, bias=True)
  (r6): ReLU()
  (l7): Linear(in_features=16, out_features=8, bias=True)
  (r7): ReLU()
  (l8): Linear(in_features=8, out_features=4, bias=True)
  (r8): ReLU()
  (l9): Linear(in_features=4, out_features=2, bias=True)
  (r9): ReLU()
  (l10): Linear(in_features=2, out_features=1, bias=True)
)

In [147]:
summary(model, input_size=(61, ))

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
            Linear-1                 [-1, 1024]          63,488
              ReLU-2                 [-1, 1024]               0
            Linear-3                  [-1, 512]         524,800
              ReLU-4                  [-1, 512]               0
            Linear-5                  [-1, 256]         131,328
              ReLU-6                  [-1, 256]               0
            Linear-7                  [-1, 128]          32,896
              ReLU-8                  [-1, 128]               0
            Linear-9                   [-1, 64]           8,256
             ReLU-10                   [-1, 64]               0
           Linear-11                   [-1, 32]           2,080
             ReLU-12                   [-1, 32]               0
           Linear-13                   [-1, 16]             528
             ReLU-14                   

In [148]:
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [149]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model.to(device)
device

device(type='cpu')

In [None]:
# train the model
epochs = 10_000

# stuff to store
train_losses = np.zeros(epochs)
test_losses = np.zeros(epochs)

train_r2s = np.zeros(epochs)
test_r2s = np.zeros(epochs)

for epoch in range(epochs):

    #move data to gpu
    inputs, targets = trainx.to(device), trainy.to(device)

    # zero the grad
    optimizer.zero_grad()

    # forwards pass and get losses
    logits = model(inputs)
    train_loss = criterion(logits, targets)
    train_r2 = coeff_determination(targets, logits)

    # step thru optimizer
    train_loss.backward()
    optimizer.step()


    # Now validate
    print("Evaluating Network.....")
    with torch.no_grad():
        model.eval()

        #move data to gpu
        inputs, targets = testx.to(device), testy.to(device)

        # forwards pass and get losses
        logits = model(inputs)
        test_loss = criterion(logits, targets)
        test_r2 = coeff_determination(targets, logits)


    model.train()
    print("=" * 50)
    # now getting out to the next epoch - all batches done - store epoch
    # loss and accus
    train_losses[epoch] = train_loss.item()
    test_losses[epoch] = test_loss.item()
    train_r2s[epoch] = train_r2
    test_r2s[epoch] = test_r2


    print(f"Epoch:{epoch}--TrainLoss:{train_losses[epoch]}--TrainR2"
          f":{train_r2s[epoch]}--TestLoss:{test_losses[epoch]}--TestR2"
          f":{test_r2s[epoch]}")

Evaluating Network.....
Epoch:0--TrainLoss:117.52787780761719--TrainR2:-6.721092700958252--TestLoss:127.81476593017578--TestR2:-8.580438613891602
Evaluating Network.....
Epoch:1--TrainLoss:117.44315338134766--TrainR2:-6.715527057647705--TestLoss:127.7081069946289--TestR2:-8.572443962097168
Evaluating Network.....
Epoch:2--TrainLoss:117.34107208251953--TrainR2:-6.708820343017578--TestLoss:127.56244659423828--TestR2:-8.56152629852295
Evaluating Network.....
Epoch:3--TrainLoss:117.20267486572266--TrainR2:-6.699728488922119--TestLoss:127.39791107177734--TestR2:-8.549193382263184
Evaluating Network.....
Epoch:4--TrainLoss:117.04439544677734--TrainR2:-6.689330101013184--TestLoss:127.1757583618164--TestR2:-8.532541275024414
Evaluating Network.....
Epoch:5--TrainLoss:116.83293914794922--TrainR2:-6.675437927246094--TestLoss:126.92415618896484--TestR2:-8.51368236541748
Evaluating Network.....
Epoch:6--TrainLoss:116.60167694091797--TrainR2:-6.660245418548584--TestLoss:126.73902130126953--TestR2:-

In [None]:

# plot the performance
epoch_num = range(1,epochs+1)
loss = train_losses
train_r2 = train_r2s
val_loss = test_losses
test_r2 = test_r2s
plot_df = pd.DataFrame(data=np.c_[epoch_num,loss,train_r2,val_loss,
                                  test_r2],
                       columns=['epochs','loss', 'train_r2', 'val_loss',
                                'test_r2'])

# do the actual plots
sns.set(font_scale=1)
f, ax = plt.subplots(1, 1, figsize=(15,8))
# sns.lineplot(data=plot_df, x='epochs', y='loss', ax=ax, label='train loss', linewidth=3)
sns.lineplot(data=plot_df, x='epochs', y='train_r2', ax=ax, label='train_r2',
             linewidth=3)
# sns.lineplot(data=plot_df, x='epochs', y='val_loss', ax=ax, label='val loss', linewidth=3)
sns.lineplot(data=plot_df, x='epochs', y='test_r2', ax=ax,
             label='test_r2', linewidth=3)
ax.set_ylabel('Loss or R2')
ax.set_xlabel('Epochs')
plt.setp(ax.get_legend().get_texts(), fontsize='18'); # for legend text