In [1]:
import numpy as np
import os
import torch
import pandas as pd
import yaml

from sklearn.externals import joblib

from pytorch_utils.cfvae_models import CFVAEModel

In [2]:
outcome = 'los'
sensitive_variable = 'gender'
data_path = 'data/'

# features_path = os.path.join(data_path, '{}_excluded'.format(sensitive_attibute),'features', str(0))
features_path = os.path.join(data_path, 'features', str(0))
label_path = os.path.join(data_path, 'labels')
config_path = os.path.join(data_path, 'config', 'grid', 'baseline')
checkpoints_path = os.path.join(data_path, 'checkpoints', 'scratch', outcome)
performance_path = os.path.join(data_path, 'performance', 'scratch', outcome)

In [3]:
os.makedirs(checkpoints_path, exist_ok=True)
os.makedirs(performance_path, exist_ok=True)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
features_dict = joblib.load(os.path.join(features_path, 'features.pkl'))
master_label_dict = joblib.load(os.path.join(label_path, 'label_dict.pkl'))

In [6]:
data_dict = {split: features_dict[split]['features'] for split in features_dict.keys()}
label_dict = {split : master_label_dict[split][outcome] for split in master_label_dict.keys()}
group_dict = {split : master_label_dict[split][sensitive_variable] for split in master_label_dict.keys()}

In [7]:
group_map = pd.read_csv(os.path.join(label_path, '{}_map.csv'.format(sensitive_variable)))

In [8]:
# # with open(os.path.join(config_path, '{}.yaml'.format(grid_element)), 'r') as fp:
# #     config_dict = yaml.load(fp)
    
# CFVAE
config_dict = {
    # Standard parameters
    'input_dim' : data_dict['train'].shape[1],
    'num_groups' : len(np.unique(group_dict['train'])),
    'lr' : 1e-3,
    'lr_final_classifier' : 1e-3,
    'gamma' : 0.99,
    'num_epochs' : 10,
    'iters_per_epoch' : 100,
    'output_dim' : 2,
    'batch_size' : 256,
    'sparse' : True,
    'sparse_mode' : 'binary',
    # Parameters corresponding to the size of the VAE
    'group_embed_dim' : 64,
    'latent_dim' : 64,
    'num_hidden' : 2,
    'drop_prob' : 0.0,
    'resnet' : False,
    'normalize' : False,
    # Parameters corresponding to the size of classifier
    'hidden_dim_classifier' : 128,
    'num_hidden_classifier' : 1,
    'drop_prob_classifier' : 0.0,
    'resnet_classifier' : False,
    'normalize_classifier' : False,

    # Lambda
    'lambda_reconstruction' : 1e3,
    'lambda_mmd' : 1e4,
    'lambda_kl' : 0.0,
    'lambda_classification' : 1e1,
    'lambda_mmd_group' : 1e3
}

In [9]:
if sensitive_variable == 'gender':
    data_dict = {k: v[group_dict[k] < 2] for k,v in data_dict.items()}
    label_dict = {k: v[group_dict[k] < 2] for k,v in label_dict.items()}
    group_dict = {k: v[group_dict[k] < 2] for k,v in group_dict.items()}

In [10]:
model = CFVAEModel(config_dict)
for child in model.model.children():
    print(child)

VAEEncoder(
  (encoder): FeedforwardNet(
    (layers): ModuleList(
      (0): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): EmbeddingBagLinear(
            in_features=368117, out_features=256, bias=True
            (embed): EmbeddingBag(368117, 256, mode=sum)
          )
        )
        (dropout): Dropout(p=0.0)
      )
      (1): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): Linear(in_features=256, out_features=128, bias=True)
        )
        (dropout): Dropout(p=0.0)
      )
      (2): Linear(in_features=128, out_features=128, bias=True)
    )
  )
  (reparameterization_layer): ReparameterizationLayer()
)
ConditionalDecoder(
  (decoder): FeedforwardNet(
    (layers): ModuleList(
      (0): HiddenLinearLayer(
        (linear): LinearLayerWrapper(
          (linear): Linear(in_features=80, out_features=160, bias=True)
        )
        (dropout): Dropout(p=0.0)
      )
      (1): HiddenLinearLayer(
        (linear): Line

In [11]:
%%time
result = model.train(data_dict, label_dict, group_dict)

Epoch 0/9
----------
Phase: train:
 loss: 71.090557, elbo: 3.334376, mmd: 0.000577, reconstruction: 0.058832, kl: 2.762785, classification: 0.512759, mmd_group: 0.001364,
 auc: 0.556999, auprc: 0.235462, brier: 0.164250,
Phase: val:
 loss: 11.560024, elbo: 2.988106, mmd: 0.000250, reconstruction: 0.003352, kl: 2.483518, classification: 0.501236, mmd_group: 0.000699,
 auc: 0.576016, auprc: 0.252385, brier: 0.160172,
Best model updated
Epoch 1/9
----------
Phase: train:
 loss: 11.209934, elbo: 3.251243, mmd: 0.000242, reconstruction: 0.003265, kl: 2.763431, classification: 0.484547, mmd_group: 0.000682,
 auc: 0.628552, auprc: 0.309821, brier: 0.154596,
Phase: val:
 loss: 11.031716, elbo: 4.209954, mmd: 0.000251, reconstruction: 0.003180, kl: 3.744842, classification: 0.461932, mmd_group: 0.000721,
 auc: 0.698539, auprc: 0.376926, brier: 0.147164,
Best model updated
Epoch 2/9
----------
Phase: train:
 loss: 10.660005, elbo: 4.193859, mmd: 0.000244, reconstruction: 0.003167, kl: 3.756065, 

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/IPython/core/magics/execution.py", line 1238, in time
    exec(code, glob, local_ns)
  File "<timed exec>", line 1, in <module>
  File "/home/spfohl/projects/fairness_cf/pytorch_utils/cfvae_models.py", line 276, in train
    batch_loss_dict['loss'].backward()
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/torch/tensor.py", line 102, in backward
    torch.autograd.backward(self, gradient, retain_graph, create_graph)
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/torch/autograd/__init__.py", line 90, in backward
    allow_unreachable=True)  # allow_unreachable flag
KeyboardInterrupt

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/labs/shahlab/spfohl/miniconda3/envs/py_env/lib/python3.6/site-packages/IPython/core/interactiveshell.py", line 1863, 

KeyboardInterrupt: 

In [12]:
# result_final_classifier = model.train(data_dict, label_dict, group_dict)
result_eval = model.predict(data_dict, label_dict, group_dict, phases = ['val', 'test'])

In [13]:
model.process_result_dict(result)

NameError: name 'result' is not defined

In [None]:
model.process_result_dict(result_eval[1])