In [1]:
import sys
from pathlib import Path
import os
import h5py

import torch

sys.path.append("Dietnet/")
from Dietnet.make_attributions import load_data, load_model
from Dietnet.helpers import dataset_utils as du
from Dietnet.Interpretability import attribution_manager as am
from Dietnet.helpers import mainloop_utils as mlu

os.environ["CUDA_VISIBLE_DEVICES"] = "0,1,2"

## Get Attributions for PyTorch Model

In [2]:
which_fold = 0
seed = 23
train_valid_ratio = 0.75
batch_size = 24

exp_path = Path('/home/rochefortc/shared_disk_wd4tb/rochefortc/Dietnetwork/Dietnet2/1000G_EXP/EXP01_2020.07')
exp_folder = 'REPRODUCE_2020.07'
full_path = exp_path / exp_folder / '{}_fold{}'.format(exp_folder, which_fold)
model_path =  full_path / 'model_params.pt'

dataset = 'dataset.npz'
embedding = 'embedding.npz'
folds_indexes = 'folds_indexes.npz'

device = torch.device('cuda:0')

# Load embedding
emb = du.load_embedding(os.path.join(exp_path, embedding), which_fold)
emb = emb.to(device)
emb = emb.float()

# Normalize embedding
emb_norm = (emb ** 2).sum(0) ** 0.5
emb = emb/emb_norm

# Instantiate model
n_feats_emb = emb.size()[1] # input of aux net
n_feats = emb.size()[0] # input of main net


# Hidden layers size
n_hidden_u = 100
n_hidden1_u = 100
n_hidden2_u = 100
# Output layer
n_targets = 26
input_dropout = 0.

disc_net = load_model(model_path, emb, device, n_feats_emb, n_hidden_u, n_hidden1_u,  n_hidden2_u, n_targets, input_dropout, incl_bias=True, incl_softmax=True)
test_generator, x_test = load_data(exp_path, dataset, folds_indexes, which_fold, seed, train_valid_ratio, device, batch_size)

3 GPUs detected! Running in DataParallel mode


In [3]:
attr_manager = am.AttributionManager()

attr_manager.set_model(disc_net)
attr_manager.init_attribution_function(attr_type='int_grad', backend='captum')
# attr_manager.init_attribution_function(attr_type='int_grad', backend='custom')
attr_manager.set_data_generator(test_generator)
attr_manager.set_genotypes_data(x_test)
attr_manager.set_raw_attributions_file(os.path.join(full_path, 'attrs_true_2.h5')) #'attrs_w_softmax.h5'))
attr_manager.set_device(device)

initialized attribution_function. You can call `create_attributions` method once you set model and data_generator


In [4]:
# baseline is 0s, ran for 100 iterations method="riemann_right"
# default 50 iterations, method="gausslegendre"

#  Make true 0 baseline
baseline = test_generator.dataset.xs.min(0).values.view(1,-1)
#baseline = test_generator.dataset.xs.max(0).values.view(1,-1)
#baseline = torch.zeros(1, x_test[0].shape[0]).to(device)

attr_manager.create_raw_attributions(False, 
                                     only_true_labels=False,
                                     n_steps=100, 
                                     method='riemann_left',
                                     baselines=baseline)

completed 24/690 [3.478261%]
completed 48/690 [6.956522%]
completed 72/690 [10.434783%]
completed 96/690 [13.913043%]
completed 120/690 [17.391304%]
completed 144/690 [20.869565%]
completed 168/690 [24.347826%]
completed 192/690 [27.826087%]
completed 216/690 [31.304348%]
completed 240/690 [34.782609%]
completed 264/690 [38.260870%]
completed 288/690 [41.739130%]
completed 312/690 [45.217391%]
completed 336/690 [48.695652%]
completed 360/690 [52.173913%]
completed 384/690 [55.652174%]
completed 408/690 [59.130435%]
completed 432/690 [62.608696%]
completed 456/690 [66.086957%]
completed 480/690 [69.565217%]
completed 504/690 [73.043478%]
completed 528/690 [76.521739%]
completed 552/690 [80.000000%]
completed 576/690 [83.478261%]
completed 600/690 [86.956522%]
completed 624/690 [90.434783%]
completed 648/690 [93.913043%]
completed 672/690 [97.391304%]
completed 690/690 [100.000000%]
saved attributions to /home/rochefortc/shared_disk_wd4tb/rochefortc/Dietnetwork/Dietnet2/1000G_EXP/EXP01_2

In [5]:
#  compute attribution average
out = attr_manager.get_attribution_average(False)

with h5py.File(full_path / 'attrs_avg_true_2.h5', 'w') as hf: # 'attrs_w_softmax_avg.h5'
    hf['avg_attr'] = out.cpu().numpy()
print('attr avg saved to {}'.format(full_path / 'attrs_avg_true_2.h5')) #'attrs_w_softmax_avg.h5'))

completed 0/690 [0.000000%]
completed 20/690 [2.898551%]
completed 40/690 [5.797101%]
completed 60/690 [8.695652%]
completed 80/690 [11.594203%]
completed 100/690 [14.492754%]
completed 120/690 [17.391304%]
completed 140/690 [20.289855%]
completed 160/690 [23.188406%]
completed 180/690 [26.086957%]
completed 200/690 [28.985507%]
completed 220/690 [31.884058%]
completed 240/690 [34.782609%]
completed 260/690 [37.681159%]
completed 280/690 [40.579710%]
completed 300/690 [43.478261%]
completed 320/690 [46.376812%]
completed 340/690 [49.275362%]
completed 360/690 [52.173913%]
completed 380/690 [55.072464%]
completed 400/690 [57.971014%]
completed 420/690 [60.869565%]
completed 440/690 [63.768116%]
completed 460/690 [66.666667%]
completed 480/690 [69.565217%]
completed 500/690 [72.463768%]
completed 520/690 [75.362319%]
completed 540/690 [78.260870%]
completed 560/690 [81.159420%]
completed 580/690 [84.057971%]
completed 600/690 [86.956522%]
completed 620/690 [89.855072%]
completed 640/690 

##  Computing Other kinds of Attributions

In [None]:
#  you can change the attr_type as well!
attr_manager.init_attribution_function(attr_type='saliency', backend='captum')
attr_saliency = attr_manager.attr_func.attribute(inputs=(input_batch.to(attr_manager.device)), target=target_batch.to(attr_manager.device), abs=False)
attr_saliency = attr_saliency.cpu().numpy()

##  Computing Attribution for Theano Model

In [None]:
which_fold = 0
seed = 23
train_valid_ratio = 0.75

exp_path = Path('/home/rochefortc/shared_disk_wd4tb/rochefortc/Dietnetwork/Dietnet2/1000G_EXP/EXP01_2020.07')
exp_folder = 'REPRODUCE_2020.07'
full_path = exp_path / exp_folder / '{}_fold{}'.format(exp_folder, which_fold)

#  open theano files
theano_dir = Path('/home/rochefortc/shared_disk_wd4tb/rochefortc/Dietnetwork/1000G_EXP/EXP02_2_2019.09/final_models/1000_genomes/' +
                  '1000G_2__our_model1.0_lr-3e-05_anneal-0.999_eni-0.02_dni-0.02_accuracy_BN-1_Inpdrp-1.0_EmbNoise-0.0_decmode-regression_hu-100_tenc-100-100_tdec-100-100_hs-100_fold0')
theano_attrs_fname = theano_dir / 'additional_data.npz'
theano_model_fname = theano_dir / 'dietnet_best.npz'

dataset = 'dataset.npz'
embedding = 'embedding.npz'
folds_indexes = 'folds_indexes.npz'

device = torch.device('cuda:0')

# Load embedding
emb = du.load_embedding(os.path.join(exp_path, embedding), which_fold)
emb = emb.to(device)
emb = emb.float()

# Normalize embedding
emb_norm = (emb ** 2).sum(0) ** 0.5
emb = emb/emb_norm

# Instantiate model
n_feats_emb = emb.size()[1] # input of aux net
n_feats = emb.size()[0] # input of main net

# Hidden layers size
emb_n_hidden_u = 100
discrim_n_hidden1_u = 100
discrim_n_hidden2_u = 100
# Output layer
n_targets = 26
input_dropout = 0.

disc_net = mlu.load_theano_model(n_feats_emb, emb_n_hidden_u, discrim_n_hidden1_u, discrim_n_hidden2_u, n_targets, theano_model_fname, device, only_discrim_model=True)
test_generator, x_test = load_data(exp_path, dataset, folds_indexes, which_fold, seed, train_valid_ratio, device)

In [None]:
attr_manager = am.AttributionManager()

attr_manager.set_model(disc_net)
attr_manager.init_attribution_function(attr_type='int_grad', backend='captum')
# attr_manager.init_attribution_function(attr_type='int_grad', backend='custom')
attr_manager.set_data_generator(test_generator)
attr_manager.set_genotypes_data(x_test)
attr_manager.set_raw_attributions_file(os.path.join(theano_dir, 'attrs_theano.h5'))
attr_manager.set_device(device)

In [None]:
#  ensures the same!
"""
attr_manager.model(test_generator.dataset.xs[0:1])
import numpy as np
with np.load(theano_attrs_fname, allow_pickle=True, encoding='bytes') as adata:
    print(adata.files)
    test_scores = adata['test_scores']
    test_preds = adata['test_predictions']
print(test_scores[0])
"""

In [None]:
baseline = torch.zeros(1, x_test[0].shape[0]).to(device)

attr_manager.create_raw_attributions(False, 
                                     only_true_labels=False,
                                     baselines=baseline,
                                     n_steps=100, 
                                     method='riemann_left')

In [None]:
out = attr_manager.get_attribution_average()
with h5py.File(os.path.join(theano_dir, 'attrs_theano_avg.h5'), 'w') as hf:
    hf['avg_attr'] = out.cpu().numpy()