General import statements. REMEMBER, DEEPLIFT_DIR needs to point to the deeplift directory WITHIN the deeplift repo

In [48]:
%matplotlib inline
from __future__ import division;
from __future__ import print_function;
from __future__ import absolute_import;
import sys, os;
from collections import OrderedDict, namedtuple;
import numpy as np;

#Make sure the directory is set to import the lab's version of keras
scriptsDir = os.environ.get("KERAS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable KERAS_DIR");
sys.path.insert(0,scriptsDir)

scriptsDir = os.environ.get("ENHANCER_SCRIPTS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable ENHANCER_SCRIPTS_DIR to point to the enhancer_prediction_code repo");
sys.path.insert(0,scriptsDir+"/featureSelector/deepLIFFT");
from deepLIFTutils import makePngOfSequenceDeepLIFTScores

import deeplift
import deeplift.conversion.keras_conversion as kc

Load the keras model

In [49]:
#Load the keras model, make sure you normalise the weights
#of the first convolutional layer to be mean-centered at each position.
model_weights = "modelsDir_runs/record_0_model_DXhpM_modelWeights.h5"
model_yaml = "modelsDir_runs/record_0_model_DXhpM_modelYaml.yaml"
reload(kc)
keras_model = kc.load_keras_model(model_weights, model_yaml, normalise_conv_for_one_hot_encoded_input=False)

Load the data

In [50]:
scriptsDir = os.environ.get("UTIL_SCRIPTS_DIR");
if (scriptsDir is None):
    raise Exception("Please set environment variable UTIL_SCRIPTS_DIR to point to the deeplift code");
sys.path.insert(0,scriptsDir);
from importDataPackage import importData
reload(importData)
trainData, validData, testData = importData.loadTrainTestValidFromYaml("yaml_10K/features.yaml",
                                                                       "yaml_10K/labels.yaml",
                                                                       "yaml_10K/splits.yaml")

(0, 'rows skipped from', 'features.gz')
Returning desired dict
Making numpy arrays out of the loaded files
('train', 'shapeX', (8000, 50))
('train', 'shapeY', (8000, 45))
('valid', 'shapeX', (1000, 50))
('valid', 'shapeY', (1000, 45))
('test', 'shapeX', (1000, 50))
('test', 'shapeY', (1000, 45))


In [51]:
data = trainData.concat(validData, testData)

Convert the keras sequential model into a deeplift sequential model, and compile the functions to compute the contributions and multipliers - the multipliers are analogous to the gradients

In [52]:
from deeplift.blobs import MxtsMode
reload(kc)

<module 'deeplift.conversion.keras_conversion' from '/Users/avantishrikumar/Research/deeplift/deeplift/conversion/keras_conversion.pyc'>

In [53]:
deeplift_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.DeepLIFT)
deeplift_contribs_func = deeplift_model.get_target_contribs_func(find_scores_layer_idx=0)
deeplift_multipliers_func = deeplift_model.get_target_multipliers_func(find_scores_layer_idx=0)

In [54]:
deeplift_expo_upweight_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.DeepLIFT, expo_upweight_factor=1)
deeplift_expo_upweight_contribs_func = deeplift_expo_upweight_model.get_target_contribs_func(find_scores_layer_idx=0)

Do the same for other saliency map functions

In [55]:
gradients_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.Gradient)
grad_times_inp_func = gradients_model.get_target_contribs_func(find_scores_layer_idx=0)
guided_backprop_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.GuidedBackprop)
guided_backprop_func = guided_backprop_model.get_target_contribs_func(find_scores_layer_idx=0)
deconv_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.DeconvNet)
deconv_func = deconv_model.get_target_contribs_func(find_scores_layer_idx=0)
guided_backprop_deeplift1_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.GuidedBackpropDeepLIFT1)
guided_backprop_deeplift1_func = guided_backprop_deeplift1_model.get_target_contribs_func(find_scores_layer_idx=0)
guided_backprop_deeplift4_model = kc.convert_sequential_model(keras_model, mxts_mode=MxtsMode.GuidedBackpropDeepLIFT4)
guided_backprop_deeplift4_func = guided_backprop_deeplift4_model.get_target_contribs_func(find_scores_layer_idx=0)

In [56]:
import theano
predictions_func = theano.function([deeplift_model.get_layers()[0].get_activation_vars()],
                                   deeplift_model.get_layers()[-1].get_activation_vars(),
                                   allow_input_downcast=True)

In [57]:
predictions_func([np.ones(testData.X[0].shape)*0.001])

array([[ 0.02288487,  0.01503252,  0.01391376,  0.01689468,  0.01365434,
         0.01088016,  0.01467786,  0.02019062,  0.01832591,  0.01921142,
         0.01459569,  0.0175929 ,  0.01468958,  0.01762873,  0.01738314,
         0.02023337,  0.01273123,  0.01259801,  0.01287257,  0.01017799,
         0.01327073,  0.01009995,  0.01617088,  0.01522067,  0.01735248,
         0.01058327,  0.0125776 ,  0.01211225,  0.01547582,  0.01146999,
         0.0125803 ,  0.01609561,  0.01794443,  0.01786777,  0.01431722,
         0.00875479,  0.00894387,  0.01631268,  0.01100512,  0.01454011,
         0.02080688,  0.0154064 ,  0.01683253,  0.01237277,  0.0133016 ]], dtype=float32)

In [58]:
predictions = predictions_func(data.X)

In [59]:
for output_idx in range(data.Y.shape[-1]):
    print(sorted(enumerate(zip(deeplift_model.get_layers()[-2].W[:,output_idx],
                               deeplift_model.get_layers()[-4].b)), key=lambda x: -abs(x[1][0]) )[0:2])


[(14, (-9.90273, 0.42178848)), (5, (-8.8509884, 0.45459494))]
[(10, (-8.8869429, 0.33364159)), (5, (-8.5870066, 0.45459494))]
[(5, (-9.0000973, 0.45459494)), (16, (-7.4516282, 0.35848406))]
[(7, (-10.08846, 0.4254503)), (5, (-8.6113901, 0.45459494))]
[(5, (-8.7059097, 0.45459494)), (3, (-8.4875746, 0.30556861))]
[(5, (-9.0618114, 0.45459494)), (18, (-8.620079, 0.33977368))]
[(19, (-9.7191772, 0.39901626)), (5, (-8.8178024, 0.45459494))]
[(8, (-9.4743862, 0.44665283)), (5, (-8.8355494, 0.45459494))]
[(9, (-9.4154024, 0.44174463)), (5, (-8.5573454, 0.45459494))]
[(14, (-10.185414, 0.42178848)), (10, (-8.2350903, 0.33364159))]
[(14, (-10.235136, 0.42178848)), (16, (-7.2781382, 0.35848406))]
[(14, (-10.490412, 0.42178848)), (7, (-10.199765, 0.4254503))]
[(14, (-10.346035, 0.42178848)), (3, (-7.8338437, 0.30556861))]
[(14, (-10.1131, 0.42178848)), (18, (-8.0485525, 0.33977368))]
[(14, (-10.446726, 0.42178848)), (19, (-10.321595, 0.39901626))]
[(14, (-10.086033, 0.42178848)), (8, (-9.0971603

In [60]:
for i in range(deeplift_model.get_layers()[1].W.shape[1]):
    print("node:",i)
    print(sorted(enumerate(deeplift_model.get_layers()[1].W[:,i]), key=lambda x: -np.abs(x[1]))[0:5])

node: 0
[(14, -3.0823469), (12, -3.066566), (13, -3.0520539), (11, -3.0448103), (10, -2.9601963)]
node: 1
[(2, 0.69218224), (0, 0.57025635), (48, 0.56648636), (1, 0.56131423), (3, 0.54749012)]
node: 2
[(1, -1.145311), (3, -1.0972462), (4, -1.0561395), (2, -0.99623019), (0, -0.98296332)]
node: 3
[(26, -3.5051348), (28, -3.5043664), (29, -3.4993424), (27, -3.4882243), (25, -3.4365299)]
node: 4
[(18, 0.70912421), (16, 0.65234143), (19, 0.59847224), (17, 0.59531063), (15, 0.52341205)]
node: 5
[(1, -4.1212611), (3, -4.0934711), (4, -4.082191), (2, -4.0742764), (0, -4.0715995)]
node: 6
[(4, -1.0874352), (0, -1.0732213), (3, -0.95294684), (41, -0.88277102), (1, -0.8556034)]
node: 7
[(24, -4.2877216), (23, -4.2757592), (21, -4.2328453), (22, -4.2306466), (20, -4.2299652)]
node: 8
[(44, -4.1709027), (43, -4.1180568), (42, -4.1108727), (40, -4.0983443), (41, -4.0622697)]
node: 9
[(49, -4.1456556), (46, -4.1388416), (48, -4.100719), (47, -4.0803556), (45, -4.0795326)]
node: 10
[(10, -3.8386698), 

Compute the contributions for all 3 tasks and the multipliers for the third task

In [61]:
deeplift_contribs,\
deeplift_expo_upweight_contribs,\
deeplift_multipliers,\
grad_times_inp,\
guided_backprop,\
deconv_contribs = [[np.array(contribs_func(task_idx=i, input_data_list=[data.X], batch_size=1000, progress_update=None))
                    for i in range(data.Y.shape[-1])]
                    for contribs_func in [deeplift_contribs_func,
                                          deeplift_expo_upweight_contribs_func,
                                          deeplift_multipliers_func,
                                          grad_times_inp_func,
                                          guided_backprop_func,
                                          deconv_func]]

In [62]:
scores = np.array(deeplift_contribs)
for scores_name, scores in [('deeplift',deeplift_contribs),
                            ('deeplift_expo_upweight', deeplift_expo_upweight_contribs),
                            ('grad_times_inp', grad_times_inp),
                            ('guided_backprop', guided_backprop),
                            ('deconv', deconv_contribs)]:
    num_caps = 10
    inputs_per_cap = int(data.X.shape[-1]/num_caps)
    task_idx=0
    true_positives = data.Y*(np.array(predictions)>0.5)
    average_proportion_on_correct_array = []
    for cap1 in range(num_caps):
        for cap2 in range(cap1+1,num_caps):
            relevant_inputs_cap1 = slice(cap1*inputs_per_cap,(cap1+1)*inputs_per_cap)
            relevant_inputs_cap2 = slice(cap2*inputs_per_cap,(cap2+1)*inputs_per_cap)
            true_positives_mask = true_positives[:,task_idx]
            scores_on_true_positives = np.abs(np.compress(np.nonzero(true_positives_mask)[0],scores[task_idx],axis=0))
            total_scores = np.sum(scores_on_true_positives, axis=1)
            total_scores = 0.0000001*(total_scores == 0) + total_scores
            scores_on_relevant_indices = np.sum(scores_on_true_positives[:,relevant_inputs_cap1], axis=1) +\
                                         np.sum(scores_on_true_positives[:,relevant_inputs_cap2], axis=1)
            proportion_on_correct_indices = scores_on_relevant_indices/total_scores
            average_proportion = np.mean(proportion_on_correct_indices)
            average_proportion_on_correct_array.append(average_proportion)
            task_idx += 1
    print(scores_name, average_proportion)

deeplift 0.958514390337
deeplift_expo_upweight 0.946541609601
grad_times_inp 0.85333035419
guided_backprop 0.26892951981
deconv 0.211597196932
