In [1]:
import numpy as np
import os
import torch
import pandas as pd
import yaml

from sklearn.externals import joblib

from pytorch_utils.cfvae_models import CFVAEModel

In [2]:
outcome = 'los'
sensitive_variable = 'age'
data_path = 'data/'

# features_path = os.path.join(data_path, '{}_excluded'.format(sensitive_attibute),'features', str(0))
features_path = os.path.join(data_path, 'features', str(0))
label_path = os.path.join(data_path, 'labels')
config_path = os.path.join(data_path, 'config', 'grid', 'baseline')
checkpoints_path = os.path.join(data_path, 'checkpoints', 'scratch', outcome)
performance_path = os.path.join(data_path, 'performance', 'scratch', outcome)

In [3]:
os.makedirs(checkpoints_path, exist_ok=True)
os.makedirs(performance_path, exist_ok=True)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
features_dict = joblib.load(os.path.join(features_path, 'features.pkl'))
master_label_dict = joblib.load(os.path.join(label_path, 'label_dict.pkl'))

In [6]:
data_dict = {split: features_dict[split]['features'] for split in features_dict.keys()}
label_dict = {split : master_label_dict[split][outcome] for split in master_label_dict.keys()}
group_dict = {split : master_label_dict[split][sensitive_variable] for split in master_label_dict.keys()}

In [7]:
group_map = pd.read_csv(os.path.join(label_path, '{}_map.csv'.format(sensitive_variable)))

In [8]:
# # with open(os.path.join(config_path, '{}.yaml'.format(grid_element)), 'r') as fp:
# #     config_dict = yaml.load(fp)
    
# CFVAE
config_dict = {
    # Standard parameters
    'input_dim' : data_dict['train'].shape[1],
    'num_groups' : group_map.shape[0],
    'lr' : 1e-3,
    'lr_final_classifier' : 1e-3,
    'gamma' : 0.99,
    'num_epochs' : 10,
    'iters_per_epoch' : 100,
    'output_dim' : 2,
    'batch_size' : 256,
    'sparse' : True,
    'sparse_mode' : 'binary',
    # Parameters corresponding to the size of the VAE
    'group_embed_dim' : 64,
    'latent_dim' : 64,
    'num_hidden' : 2,
    'drop_prob' : 0.0,
    'resnet' : False,
    'normalize' : False,
    # Parameters corresponding to the size of classifier
    'hidden_dim_classifier' : 128,
    'num_hidden_classifier' : 1,
    'drop_prob_classifier' : 0.0,
    'resnet_classifier' : False,
    'normalize_classifier' : False,

    # Lambda
    'lambda_reconstruction' : 1e3,
    'lambda_mmd' : 1e4,
    'lambda_kl' : 0.0,
    'lambda_classification' : 1e1,
    'lambda_mmd_group' : 1e3
}

In [9]:
model = CFVAEModel(config_dict)
for child in model.model.children():
    print(child)

VAEEncoder(
  (encoder): FeedforwardNet(
    (layers): ModuleList(
      (0): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): EmbeddingBagLinear(
            in_features=368117, out_features=256, bias=True
            (embed): EmbeddingBag(368117, 256, mode=sum)
          )
        )
        (dropout): Dropout(p=0.0)
      )
      (1): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): Linear(in_features=256, out_features=128, bias=True)
        )
        (dropout): Dropout(p=0.0)
      )
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
  )
  (reparameterization_layer): ReparameterizationLayer()
)
ConditionalDecoder(
  (decoder): FeedforwardNet(
    (layers): ModuleList(
      (0): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): Linear(in_features=128, out_features=256, bias=True)
        )
        (dropout): Dropout(p=0.0)
      )
      (1): HiddenLinearLayer(
        (linear): Lin

In [10]:
%%time
result = model.train(data_dict, label_dict, group_dict)

Epoch 0/9
----------
Phase: train:
 loss: 60.481441, elbo: 3.097980, mmd: 0.000412, reconstruction: 0.048110, kl: 2.530248, classification: 0.519622, mmd_group: 0.003054,
 auc: 0.549356, auprc: 0.222397, brier: 0.167573,
Phase: val:
 loss: 13.129423, elbo: 3.557044, mmd: 0.000251, reconstruction: 0.003324, kl: 3.058264, classification: 0.495457, mmd_group: 0.002340,
 auc: 0.600158, auprc: 0.279223, brier: 0.158356,
Best model updated
Epoch 1/9
----------
Phase: train:
 loss: 12.680184, elbo: 4.378470, mmd: 0.000247, reconstruction: 0.003210, kl: 3.899285, classification: 0.475975, mmd_group: 0.002236,
 auc: 0.658644, auprc: 0.361565, brier: 0.151370,
Phase: val:
 loss: 12.375405, elbo: 5.843115, mmd: 0.000250, reconstruction: 0.003121, kl: 5.386841, classification: 0.453154, mmd_group: 0.002219,
 auc: 0.705285, auprc: 0.419517, brier: 0.142807,
Best model updated
Epoch 2/9
----------
Phase: train:
 loss: 11.952869, elbo: 6.647823, mmd: 0.000252, reconstruction: 0.003103, kl: 6.226743, 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/IPython/core/magics/execution.py", line 1238, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 1, in <module>
  File "/home/spfohl/projects/fairness_cf/pytorch_utils/cfvae_models.py", line 240, in train
    target = torch.FloatTensor(inputs.todense()).to(self.device)
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/scipy/sparse/base.py", line 792, in todense
    return np.asmatrix(self.toarray(order=order, out=out))
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/scipy/sparse/compressed.py", line 954, in toarray
    _sparsetools.csr_todense(M, N, x.indptr, x.indices, x.data, y)
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/IPython/core/int

KeyboardInterrupt: 

In [11]:
# result_final_classifier = model.train(data_dict, label_dict, group_dict)
result_eval = model.predict(data_dict, label_dict, group_dict, phases = ['val', 'test'])

KeyError: 'test'

In [None]:
model.process_result_dict(result)

In [None]:
model.process_result_dict(result_eval[1])