General import statements. REMEMBER, DEEPLIFT_DIR needs to point to the deeplift directory WITHIN the deeplift repo

In [117]:
%matplotlib inline
from __future__ import division;
from __future__ import print_function;
from __future__ import absolute_import;
import sys, os;
from collections import OrderedDict, namedtuple;
import numpy as np;

#Make sure the directory is set to import the lab's version of keras
scriptsDir = os.environ.get("KERAS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable KERAS_DIR");
sys.path.insert(0,scriptsDir)

scriptsDir = os.environ.get("ENHANCER_SCRIPTS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable ENHANCER_SCRIPTS_DIR to point to the enhancer_prediction_code repo");
sys.path.insert(0,scriptsDir+"/featureSelector/deepLIFFT");
from deepLIFTutils import makePngOfSequenceDeepLIFTScores

import deeplift
import deeplift.conversion.keras_conversion as kc

Load the keras model

In [118]:
#Load the keras model, make sure you normalise the weights
#of the first convolutional layer to be mean-centered at each position.
model_weights = "modelsDir_runs/record_0_model_9CS6P_modelWeights.h5"
model_yaml = "modelsDir_runs/record_0_model_9CS6P_modelYaml.yaml"
reload(kc)
keras_model = kc.load_keras_model(model_weights, model_yaml, normalise_conv_for_one_hot_encoded_input=False)

Load the data

In [119]:
scriptsDir = os.environ.get("UTIL_SCRIPTS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable UTIL_SCRIPTS_DIR to point to the deeplift code");
sys.path.insert(0,scriptsDir);
from importDataPackage import importData
reload(importData)
trainData, validData, testData = importData.loadTrainTestValidFromYaml("yaml/features.yaml",
                                                                       "yaml/labels.yaml",
                                                                       "yaml/splits.yaml")

(0, 'rows skipped from', 'features.gz')
Returning desired dict
Making numpy arrays out of the loaded files
('train', 'shapeX', (80000, 20))
('train', 'shapeY', (80000, 1))
('valid', 'shapeX', (10000, 20))
('valid', 'shapeY', (10000, 1))
('test', 'shapeX', (10000, 20))
('test', 'shapeY', (10000, 1))


In [120]:
data = trainData.concat(validData, testData)

Convert the keras sequential model into a deeplift sequential model, and compile the functions to compute the contributions and multipliers - the multipliers are analogous to the gradients

In [121]:
from deeplift.blobs import MxtsMode
reload(kc)

<module 'deeplift.conversion.keras_conversion' from '/Users/avantishrikumar/Research/deeplift/deeplift/conversion/keras_conversion.pyc'>

In [122]:
from deeplift import blobs
reload(blobs)
deeplift_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.DeepLIFT)
deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-1)
deeplift_multipliers_func = deeplift_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-1)

Do the same for other saliency map functions

In [123]:
gradients_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.Gradient)
grad_times_inp_func = gradients_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-1)
grad_func = gradients_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-1)
guided_backprop_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.GuidedBackprop)
guided_backprop_times_inp_func = guided_backprop_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-1)
guided_backprop_func = guided_backprop_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-1)
deconv_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.DeconvNet)
deconv_func = deconv_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-1)
deconv_times_inp_func = deconv_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-1)

In [124]:
import theano
predictions_func = theano.function([deeplift_model.get_layers()[0].get_activation_vars()],
                                   deeplift_model.get_layers()[-1].get_activation_vars(),
                                   allow_input_downcast=True)

In [125]:
predictions_func([np.ones(testData.X[0].shape)*0.0])

array([[ 0.32562831]], dtype=float32)

In [126]:
predictions = predictions_func(data.X)

Compute the contributions for all 3 tasks and the multipliers for the third task

In [127]:
deeplift_contribs,\
deeplift_multipliers,\
grad_times_inp,\
grad,\
guided_backprop_times_inp,\
guided_backprop,\
deconv_times_inp,\
deconv = [np.array(contribs_func(task_idx=0, input_data_list=[data.X], batch_size=1000, progress_update=None))
                    for contribs_func in [deeplift_contribs_func,
                                          deeplift_multipliers_func,
                                          grad_func,
                                          grad_times_inp_func,
                                          guided_backprop_times_inp_func,
                                          guided_backprop_func,
                                          deconv_times_inp_func,
                                          deconv_func]]

In [131]:
true_positives = np.abs(data.Y[:,0] - np.array(predictions[:,0])) < 0.2
print("True positives: ", true_positives.shape, np.sum(true_positives))
thresholds = np.arange(0,1,1.0/data.X.shape[-1])
true_scores = np.maximum(data.X-thresholds[None,:],0)

for scores_name, scores in [('deeplift',deeplift_contribs),
                            ('grad_times_inp', grad_times_inp),
                            ('grad', grad),
                            ('guided_backprop_times_inp', guided_backprop_times_inp),
                            ('guided_backprop', guided_backprop),
                            ('deconv_times_inp', deconv_times_inp),
                            ('deconv', deconv)]:
    scores = scores
    #normalise scores to equal to prediction
    #scores = scores * (predictions[:,0]/np.sum(scores, axis=1))[:,None]
    
    true_scores_on_true_positives = np.compress(np.nonzero(true_positives.squeeze())[0], true_scores, axis=0)
    scores_on_true_positives = np.compress(np.nonzero(true_positives.squeeze())[0], scores, axis=0)
    mse = np.mean(np.square(true_scores_on_true_positives-scores_on_true_positives))
    print(scores_name, mse)

True positives:  (20000,) 20000
deeplift 0.00147205033174
grad_times_inp 0.259726147902
grad 0.0798918614418
guided_backprop_times_inp 0.0872208890279
guided_backprop 0.229953927246
deconv_times_inp 0.151141441722
deconv 0.500190961702
