In [1]:
import numpy as np
import os
import torch
from sklearn.externals import joblib

from pytorch_utils.datasets import ArrayDataset
from pytorch_utils.models import SparseModel
import pytorch_utils

In [2]:
data_path = 'data/'
features_path = os.path.join(data_path, 'features', str(0))
label_path = os.path.join(data_path, 'labels')

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [4]:
features_dict = joblib.load(os.path.join(features_path, 'features.pkl'))
label_dict = joblib.load(os.path.join(label_path, 'label_dict.pkl'))

In [5]:
outcome = 'los'

In [6]:
data_dict = {split: features_dict[split]['features'] for split in features_dict.keys()}
outcome_dict = {split : label_dict[split][outcome] for split in label_dict.keys()}

In [7]:
config_dict = {
    'input_dim' : data_dict['train'].shape[1],
    'output_dim' : 3,
    'lr' : 1e-3,
    'num_epochs' : 10,
    'batch_size' : 256,
    'iters_per_epoch' : None
}

config_dict = {
    'input_dim' : data_dict['train'].shape[1],
    'output_dim' : 3,
    'lr' : 1e-4,
    'num_epochs' : 10,
    'batch_size' : 256,
    'hidden_dim' : 256,
    'num_hidden' : 1,
    'output_dim' : 2,
    'drop_prob' : 0.0,
    'batch_norm' : False,
    'iters_per_epoch' : None
}

In [8]:
num_samples = 1000
small_data_dict = {key: data_dict[key][:num_samples] if key == 'train' else data_dict[key] for key in data_dict.keys()}
small_outcome_dict = {key: outcome_dict[key][:num_samples] if key == 'train' else outcome_dict[key] for key in data_dict.keys()}
small_data_dict

{'train': <1000x368117 sparse matrix of type '<class 'numpy.float32'>'
 	with 285517 stored elements in Compressed Sparse Row format>,
 'test': <12963x368117 sparse matrix of type '<class 'numpy.float32'>'
 	with 3694263 stored elements in Compressed Sparse Row format>,
 'val': <12964x368117 sparse matrix of type '<class 'numpy.float32'>'
 	with 3668580 stored elements in Compressed Sparse Row format>}

In [None]:
%%time
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
model = SparseModel(config_dict)
result = model.train(data_dict, outcome_dict)
print(model.predict(data_dict, outcome_dict, keys = ['test']))

Epoch 0/9
----------
Phase: train:
 loss: 0.389058,
 auc: 0.818697, auprc: 0.546816, brier: 0.124225,
Phase: val:
 loss: 0.359713,
 auc: 0.850477, auprc: 0.603915, brier: 0.114538,
Best model updated
Epoch 1/9
----------


In [None]:
# %%time
# torch.manual_seed(0)
# torch.backends.cudnn.deterministic = True
# torch.backends.cudnn.benchmark = False

# model2 = SparseModel(config_dict)
# result2 = model2.train(data_dict, outcome_dict)
# # result = model.train(small_data_dict, small_outcome_dict)
# print(model2.predict(data_dict, outcome_dict, keys = ['test']))