In [1]:
import numpy as np
import os
import torch
import pandas as pd
import yaml

from sklearn.externals import joblib

# from pytorch_utils.models import FeedforwardNetModel
from pytorch_utils.cfvae_models import CFVAEModel

In [2]:
outcome = 'los'
sensitive_variable = 'age'
data_path = 'data/'

# features_path = os.path.join(data_path, '{}_excluded'.format(sensitive_attibute),'features', str(0))
features_path = os.path.join(data_path, 'features', str(0))
label_path = os.path.join(data_path, 'labels')
config_path = os.path.join(data_path, 'config', 'grid', 'baseline')
checkpoints_path = os.path.join(data_path, 'checkpoints', 'scratch', outcome)
performance_path = os.path.join(data_path, 'performance', 'scratch', outcome)

In [3]:
os.makedirs(checkpoints_path, exist_ok=True)
os.makedirs(performance_path, exist_ok=True)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
features_dict = joblib.load(os.path.join(features_path, 'features.pkl'))
master_label_dict = joblib.load(os.path.join(label_path, 'label_dict.pkl'))

In [6]:
grid_element = 1

In [7]:
data_dict = {split: features_dict[split]['features'] for split in features_dict.keys()}
label_dict = {split : master_label_dict[split][outcome] for split in master_label_dict.keys()}
group_dict = {split : master_label_dict[split][sensitive_variable] for split in master_label_dict.keys()}

In [8]:
group_map = pd.read_csv(os.path.join(label_path, '{}_map.csv'.format(sensitive_variable)))

In [9]:
# # with open(os.path.join(config_path, '{}.yaml'.format(grid_element)), 'r') as fp:
# #     config_dict = yaml.load(fp)
    
# # config_dict['num_epochs'] = 3 # For testing

# ## A more complex network
# # config_dict = {
# #     'input_dim' : data_dict['train'].shape[1],
# #     'lr' : 1e-5,
# #     'num_epochs' : 20,
# #     'batch_size' : 256,
# #     'hidden_dim' : 128,
# #     'num_hidden' : 1,
# #     'output_dim' : 2,
# #     'drop_prob' : 0.5,
# #     'normalize' : True,
# #     'iters_per_epoch' : 100,
# #     'gamma' : 0.99,
# #     'resnet' : True,
# #     'sparse' : True,
# #     'sparse_mode' : 'binary'
# # }

# CFVAE
config_dict = {
    'input_dim' : data_dict['train'].shape[1],
    'num_groups' : group_map.shape[0],
    'lr' : 1e-3,
    'lr_final_classifier' : 1e-5,
    'output_dim' : 2,
    'num_epochs' : 20,
    'batch_size' : 256,
    'hidden_dim' : 128,
    'group_embed_dim' : 64,
    'latent_dim' : 64,
    'num_hidden' : 1,
    'drop_prob' : 0.0,
    'normalize' : False,
    'iters_per_epoch' : 100,
    'gamma' : 0.99,
    'resnet' : False,
    'sparse' : True,
    'sparse_mode' : 'binary',
    'lambda_mmd' : 1e3,
    'lambda_kl' : 0.0,
#     'lambda_mmd' : 0.0,
#     'lambda_kl' : 1e0,
    'lambda_classification' : 1e2,
    'lambda_mmd_group' : 1e0
}

In [10]:
model = CFVAEModel(config_dict)

KeyboardInterrupt: 

In [None]:
# for child in model.model.children():
#     print(child)

In [None]:
%%time
result = model.train(data_dict, label_dict, group_dict)

In [None]:
# result_final_classifier = model.train(data_dict, label_dict, group_dict)
result_eval = model.predict(data_dict, label_dict, group_dict, phases = ['val', 'test'])

In [None]:
## Save weights
# model.save_weights(os.path.join(checkpoints_path, '{}.chk'.format(grid_element)))