## Todos

model isn't exactly reproducible. having different results when i load it

As of 24 September my best uncertainty model according to recall@30 is registered in wandb in the project "final_model_p1_pre_uncertainty_tunning" and it is called "golden-oath-84"

The config is as follow:
- type: gru
- hidden_size = 100
- lr: 0.01
- layers = 1
- batch_size = 64
- dropout: 0.1
- epochs: 15


In this notebook i will create a pickle of this model to be used for later experiments

In [2]:
# show all outputs
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
model_name = 'golden-oath-84'

In [4]:
import os
import json

from rnn_utils import DiagnosesDataset, split_dataset, MYCOLLATE
from rnn_utils import train_one_epoch, eval_model

from config import Settings; settings = Settings()

from mourga_variational.variational_rnn import VariationalRNN

import torch
from torch.utils.data import Dataset, DataLoader, random_split

from sklearn.model_selection import ParameterGrid, ParameterSampler

import numpy as np
import pandas as pd

import wandb

# Parameters

In [5]:
dataset_id = 'diag_only'

# Reproducibility

In [6]:
# Reproducibility
seed = settings.random_seed
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)

torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

<torch._C.Generator at 0x7f2c1019cf30>

# Create dataset

In [7]:
# hyperparameters of best model
grouping = 'ccs'
batch_size=64

In [8]:
dataset_folder = os.path.join(settings.data_base,settings.model_ready_dataset_folder,dataset_id)
print('dataset at',dataset_folder)

dataset at data/model_ready_dataset/diag_only


In [9]:
dataset = DiagnosesDataset(os.path.join(dataset_folder,'dataset.json'),grouping)

train_dataset = DiagnosesDataset(os.path.join(dataset_folder,'train_subset.json'),grouping)
val_dataset = DiagnosesDataset(os.path.join(dataset_folder,'val_subset.json'),grouping)
test_dataset = DiagnosesDataset(os.path.join(dataset_folder,'test_subset.json'),grouping)


len(train_dataset)
len(val_dataset)
len(test_dataset)


train_dataloader = DataLoader(train_dataset,batch_size=batch_size,collate_fn=MYCOLLATE(dataset),shuffle=True)
val_dataloader = DataLoader(val_dataset,batch_size=batch_size,collate_fn=MYCOLLATE(dataset)) #batch_size here is arbitrary and doesn't affect total validation speed
test_dataloader = DataLoader(test_dataset,batch_size=batch_size,collate_fn=MYCOLLATE(dataset))

5249

1125

1125

# Define model

## Hyperparameters

In [10]:
# remaining hyperparameters of best model
input_size = next(iter(train_dataloader))['target_sequences']['sequence'].shape[2]
hidden_size = 100
num_layers = 1
n_labels = input_size
rnn_type = 'GRU'

lr = 0.01
dropout = 0.1

n_labels = input_size
epochs = 15
criterion = torch.nn.BCEWithLogitsLoss()

# Train

In [11]:
model = VariationalRNN(input_size=input_size,
                          hidden_size=hidden_size,
                          num_layers=num_layers,
                          n_labels=n_labels,
                          rnn_type=rnn_type,
                          dropouti=dropout,
                          dropoutw=dropout,
                          dropouto=dropout)
    
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

for epoch in range(1,epochs+1):
    loss = train_one_epoch(model,train_dataloader,epoch,criterion,optimizer)

# Eval

confirm it has good results

In [38]:
train_results = eval_model(model,train_dataloader,dataset, criterion, epoch, 'train_last')
val_results = eval_model(model,val_dataloader,dataset, criterion, epoch, 'validation')
test_results = eval_model(model,test_dataloader,dataset, criterion, epoch, 'test')
res = {'train_loss':train_results['loss'],
       'train_recall@30':train_results['last adm']['recall30']['mean'],
       'val_loss':val_results['loss'],
       'test_loss':test_results['loss'],
       'recall@30_test':test_results['last adm']['recall30']['mean'],
       'recall@10':val_results['last adm']['recall10']['mean'],
       'recall@20':val_results['last adm']['recall20']['mean'],
       'recall@30':val_results['last adm']['recall30']['mean'],
       'n_val_recall@30_last':val_results['last adm']['recall30']['n']
      }
res

{'train_loss': 0.03444346990936664,
 'train_recall@30': 0.7538760535481849,
 'val_loss': 0.03972551769680447,
 'test_loss': 0.038220416307449344,
 'recall@30_test': 0.6988670438979819,
 'recall@10': 0.4397468450493124,
 'recall@20': 0.601957055396802,
 'recall@30': 0.7072865203745103,
 'n_val_recall@30_last': 1125}

## Save model weights and parameters

In [15]:
model_folder = os.path.join(settings.data_base,settings.models_folder,model_name)

if not os.path.isdir(model_folder):
    os.mkdir(model_folder)

# save weights
weights_save_path = os.path.join(model_folder,"weights")

torch.save(model.state_dict(), 
           weights_save_path
          )
print('Model saved!')

Model saved!


## Save model hyperparameters

In [17]:
params = dict(input_size = input_size,
              hidden_size=hidden_size,
              num_layers=num_layers,
              n_labels=n_labels,
              rnn_type=rnn_type,
              dropouti=dropout,
              dropouto=dropout,
              dropoutw=dropout
             )


hypp_save_path = os.path.join(model_folder, 'hyper_parameters.json')

with open(hypp_save_path, "w") as f:
    json.dump(params, f)
    
print('Hyperparameters saved!')

Hyperparameters saved!


# Test it out

### Read weights and hyperparameters

In [18]:
#hyperparameters
with open(hypp_save_path,'r') as f:
    params_loaded = json.load(f)
    
# weights
weights = torch.load(weights_save_path)

### Create model and load weights

In [19]:
new_model = VariationalRNN(**params_loaded)
new_model.load_state_dict(torch.load(weights_save_path))

<All keys matched successfully>

### Evaluate it

In [40]:
eval_model(new_model,val_dataloader,dataset, criterion, epoch, 'validation')['last adm']

{'recall10': {'mean': 0.44963078459716616,
  'std': 0.20064399285689158,
  'n': 1125},
 'recall20': {'mean': 0.6151066932842334,
  'std': 0.18995652498107357,
  'n': 1125},
 'recall30': {'mean': 0.7135500131803038,
  'std': 0.17504136860314654,
  'n': 1125}}

# Performance doesn't match

For some reason, the performance isn't exactly the same for the loaded model. Maybe they are different in their parameters?

In [29]:
for p1, p2 in zip(model.parameters(), new_model.parameters()):
    if p1.data.ne(p2.data).sum() > 0:
        print('Models don\'t match')
print('Models match')

Models match


.... weird
