In [None]:
import torch
import torch.nn as nn
import statistics
import datetime
from net_archs import LSTMModel2 as LSTMModel
from sklearn.metrics import mean_squared_error, roc_auc_score


In [None]:
# Load data
nts = torch.load('./dataset2/nts.pt')
y = torch.load('./dataset2/y.pt')
ts1 = torch.load('./dataset2/ts1.pt')
ts2 = torch.load('./dataset2/ts2.pt')
ts3 = torch.load('./dataset2/ts3.pt')
ts4 = torch.load('./dataset2/ts4.pt')

In [None]:
# Train test split
from sklearn.model_selection import StratifiedKFold, KFold

In [None]:
def do_split(split, dataset):
    nts_train, ts1_train, ts2_train, ts3_train, ts4_train, y_train = dataset
    nts_eq = []
    ts1_eq = []
    ts2_eq = []
    ts3_eq = []
    ts4_eq = []
    y_eq = []
    # skf = StratifiedKFold(n_splits=10)
    n_splits = 50
    skf = KFold(n_splits=n_splits)
    skf.get_n_splits(nts_train, y_train)
    for i, (_, test_index) in enumerate(skf.split(nts_train, y_train)):
        nts_eq.append(nts_train[test_index])
        ts1_eq.append(ts1_train[test_index])
        ts2_eq.append(ts2_train[test_index])
        ts3_eq.append(ts3_train[test_index])
        ts4_eq.append(ts4_train[test_index])
        y_eq.append(y_train[test_index])

    x_split = []
    y_split = []

    acc = 0
    for s in split:
        x_split.append((
            torch.cat(nts_eq[acc:acc+int(s*n_splits)], 0),
            torch.cat(ts1_eq[acc:acc+int(s*n_splits)], 0),
            torch.cat(ts2_eq[acc:acc+int(s*n_splits)], 0),
            torch.cat(ts3_eq[acc:acc+int(s*n_splits)], 0),
            torch.cat(ts4_eq[acc:acc+int(s*n_splits)], 0)))
        y_split.append(torch.cat(y_eq[acc:acc+int(s*n_splits)], 0))
        acc += int(s*n_splits)

    return x_split, y_split


In [None]:
# Define train function
from net_archs import LSTMModel2 as LSTMModel
from sklearn.metrics import mean_squared_error, roc_auc_score

def train_lstm(net, optimizer, x, y, X_test, y_test, num_epoch=4, batch_size=256):
  print_every = -1

  for n in range(num_epoch):
    # Mini batch sgd
    nts, ts1, ts2, ts3, ts4 = x
    permutation = torch.randperm(nts.size()[0])
    for i in range(0, nts.size()[0], batch_size):
      indices = permutation[i:i+batch_size]
      x_mini = (nts[indices], ts1[indices], ts2[indices], ts3[indices], ts4[indices])
      y_mini = y[indices]
      y_pred = net(x_mini)
      loss = nn.MSELoss()(y_pred, y_mini)
      optimizer.zero_grad()
      loss.mean().backward()
      optimizer.step()
      if print_every != -1 and (i / batch_size) % print_every == 0:
        print(f'Epoch: {n + 1}, Iteration: {round(i / batch_size)}, Loss: {loss.sum()}')
    if print_every == -1:
      print(f'Epoch: {n + 1}, Loss: {loss.sum()}')
    test(net, X_test, y_test)

def test(net, x_test, y_test):
  with torch.no_grad():
    y_pred = net(x_test)
    y_pred = y_pred.detach().numpy()
    y_test = y_test.detach().numpy()

    auc = roc_auc_score(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print(f'auc {auc} mse {mse}')
    return mse, auc

In [None]:
def run_local(split, nts=nts, y=y, ts1=ts1, ts2=ts2, ts3=ts3, ts4=ts4):
    mses = []
    aucs = []

    start = datetime.datetime.now()

    skf = StratifiedKFold(n_splits=5)
    for fold_idx, (train_idx, test_idx) in enumerate(skf.split(nts, y)):
        print(f"Fold {fold_idx + 1}:")

        nts_train, nts_test = nts[train_idx], nts[test_idx]
        y_train, y_test = y[train_idx], y[test_idx]
        ts1_train = ts1[train_idx]
        ts2_train = ts2[train_idx]
        ts3_train = ts3[train_idx]
        ts4_train = ts4[train_idx]
        ts1_test = ts1[test_idx]
        ts2_test = ts2[test_idx]
        ts3_test = ts3[test_idx]
        ts4_test = ts4[test_idx]

        trainset = nts_train, ts1_train, ts2_train, ts3_train, ts4_train, y_train

        x_split, y_split = do_split(split, trainset)

        for X_train, y_train in zip(x_split, y_split):
            start = datetime.datetime.now()

            model = LSTMModel(layer_size=128, num_of_layers=2)
            optimizer = torch.optim.Adam(model.parameters(), lr=0.00001, weight_decay=0.00001)
            train_lstm(model, optimizer, X_train, y_train, (nts_test, ts1_test, ts2_test, ts3_test, ts4_test), y_test, num_epoch=20, batch_size=256)
            end = datetime.datetime.now()
            time = end - start

            mse, auc = test(model, (nts_test, ts1_test, ts2_test, ts3_test, ts4_test), y_test)

            aucs.append(auc)
            mses.append(mse)


    end = datetime.datetime.now()
    time = end - start

    print('splitting:', split)
    print('mse:', mses)
    print('auc: ', aucs)
    print('Training time: ', time)


In [None]:
split = [0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
run_local(split)

In [None]:
#split = [0.15, 0.15, 0.15, 0.15, 0.15, 0.05, 0.05, 0.05, 0.05, 0.05]
#run_local(split, nts, y, ts1, ts2, ts3, ts4)