In [1]:
import pickle as pkl
import numpy as np
import pandas as pd
from sklearn.metrics import mean_absolute_percentage_error
import torch


In [2]:
PCLscores = pd.read_csv("PCL_filter.csv")

In [3]:
PCLscores["Day"] = PCLscores["Day"].astype(int)

In [4]:
ids = PCLscores["ID"].unique().tolist()

In [5]:
for id in ids:
  temp = PCLscores[PCLscores["ID"] == id]
  days = temp["Day"].unique().tolist()
  all_days = list(range(0, 151))
  missing_days = list(set(all_days) - set(days))
  for day in missing_days:
    new_row = {"ID": id, "Day": day}
    PCLscores = pd.concat([PCLscores, pd.DataFrame([new_row])], ignore_index=True)

PCLscores = PCLscores.sort_values(["ID", "Day"])


In [24]:
def nan_func(val):
    if np.isnan(val):
        return (0,False)
    else:
        return (val,True)


In [25]:
all_features = []
all_output_mask = []
all_output_list = []

loss_mask = []
loss_list = []

for id in ids:
    features = []
    output_mask = []
    output_list = []
    temp = PCLscores[PCLscores["ID"] == id]
    temp = temp.reset_index()
    for ii in range(len(temp)-7):
        if np.isnan(temp['PCL'][ii] + temp['PCL_INTRU'][ii] + temp['PCL_AVO'][ii] + temp['PCL_NUMB'][ii] + temp['PCL_HYPER'][ii]):
            features.append([0.]*5)
        else:
            features.append(
                [temp['PCL'][ii], temp['PCL_INTRU'][ii], temp['PCL_AVO'][ii], temp['PCL_NUMB'][ii], temp['PCL_HYPER'][ii]])

        x = nan_func(temp['PCL'][ii+1])


        output_mask.append(x[1])
        loss_mask.append(x[1])

        output_list.append(x[0])
        loss_list.append(x[0])

    all_features.append(features)
    all_output_mask.append(output_mask)
    all_output_list.append(output_list)


In [53]:
train_x = torch.tensor(all_features[:200],dtype=torch.float32)
train_y = torch.tensor(all_output_list[:200],dtype=torch.float32)
train_y = train_y.reshape((train_y.shape[0],train_y.shape[1],1))
train_mask = torch.tensor(all_output_mask[:200]).reshape((train_y.shape[0],train_y.shape[1],1))
test_x = torch.tensor(all_features[200:],dtype=torch.float32)
test_y = torch.tensor(all_output_list[200:],dtype=torch.float32)
test_y = test_y.reshape((test_y.shape[0],test_y.shape[1],1))
test_mask = torch.tensor(all_output_mask[200:]).reshape((test_y.shape[0],test_y.shape[1],1))

In [93]:
PCL_GRU = torch.nn.GRU(input_size = len(all_features[0][0]),hidden_size = 1)
optimizer = torch.optim.Adam(PCL_GRU.parameters(), lr=0.01,weight_decay = 1e-5)
#scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=75, gamma=0.5)
epochs = 1000
lossfcn = torch.nn.MSELoss()
PCL_GRU.train()
for i in range(epochs):
    optimizer.zero_grad()
    y_pred = PCL_GRU(train_x)
    loss = lossfcn(y_pred[0][train_mask], train_y[train_mask])
    loss.backward()
    optimizer.step()
    print(i, loss.item())

0 2.1636343002319336
1 2.0553858280181885
2 1.9639545679092407
3 1.8872449398040771
4 1.8230310678482056
5 1.769176721572876
6 1.7237671613693237
7 1.6851677894592285
8 1.6520299911499023
9 1.623268485069275
10 1.598022699356079
11 1.5756182670593262
12 1.555527687072754
13 1.5373399257659912
14 1.520734190940857
15 1.505458950996399
16 1.4913171529769897
17 1.4781534671783447
18 1.4658440351486206
19 1.454290509223938
20 1.443413496017456
21 1.433148980140686
22 1.4234445095062256
23 1.4142570495605469
24 1.4055505990982056
25 1.397294282913208
26 1.3894624710083008
27 1.3820321559906006
28 1.374983549118042
29 1.3682985305786133
30 1.3619602918624878
31 1.3559538125991821
32 1.3502644300460815
33 1.3448785543441772
34 1.3397825956344604
35 1.3349640369415283
36 1.3304100036621094
37 1.3261083364486694
38 1.3220467567443848
39 1.318213701248169
40 1.314597487449646
41 1.3111869096755981
42 1.3079708814620972
43 1.3049389123916626
44 1.3020803928375244
45 1.299385666847229
46 1.2968450

In [55]:
def smape(A, F):
    return 100/len(A) * np.sum(2 * np.abs(F - A) / (np.abs(A) + np.abs(F)))

In [94]:
test_preds = PCL_GRU(test_x)
test_preds = test_preds[0][test_mask].detach().numpy()


smape(test_y[test_mask].detach().numpy(), test_preds)



42.50981391934539