In [1]:
from __future__ import print_function
import keras
keras.__version__

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


'2.1.6'

In [2]:
keras_model = keras.models.load_model("../dnn/nt3.h5")
keras_model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 1000)              979000    
_________________________________________________________________
activation_1 (Activation)    (None, 1000)              0         
_________________________________________________________________
dropout_1 (Dropout)          (None, 1000)              0         
_________________________________________________________________
dense_2 (Dense)              (None, 500)               500500    
_________________________________________________________________
activation_2 (Activation)    (None, 500)               0         
_________________________________________________________________
dropout_2 (Dropout)          (None, 500)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               50100     
__________

In [3]:
import deeplift
from deeplift.blobs import NonlinearMxtsMode
from deeplift.conversion import keras_conversion as kc

#Three different models, one each for RevealCancel, Gradient and GuidedBackprop
revealcancel_model = kc.convert_sequential_model(model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.RevealCancel)
grad_model = kc.convert_sequential_model(model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.Gradient)
guided_backprop_model = kc.convert_sequential_model(model=keras_model, nonlinear_mxts_mode=NonlinearMxtsMode.GuidedBackprop)

nonlinear_mxts_mode is set to: RevealCancel
Heads-up: I assume softmax is the output layer, not an intermediate one; if it's an intermediate layer, please let me know and I will prioritise that use-case
nonlinear_mxts_mode is set to: Gradient
Heads-up: I assume softmax is the output layer, not an intermediate one; if it's an intermediate layer, please let me know and I will prioritise that use-case
nonlinear_mxts_mode is set to: GuidedBackprop
Heads-up: I assume softmax is the output layer, not an intermediate one; if it's an intermediate layer, please let me know and I will prioritise that use-case


In [4]:
### load data
import pandas as pd
import numpy as np
from keras.utils import np_utils
df_test = (pd.read_csv('../data-05-31-2018/formatted_full_data.csv',header=None).values).astype('float32')
df_y_test = df_test[:,0].astype('int')
seqlen = df_test.shape[1]
Y_test = np_utils.to_categorical(df_y_test,2)
X_test = df_test[:, 1:seqlen].astype(np.float32)

In [13]:
### import deeplift and compile functions
from deeplift.util import compile_func
import numpy as np
from keras import backend as K

deeplift_model = revealcancel_model
deeplift_prediction_func = compile_func([deeplift_model.get_layers()[0].get_activation_vars()],
                                       deeplift_model.get_layers()[-1].get_activation_vars())
original_model_predictions = keras_model.predict(X_test, batch_size=200)
converted_model_predictions = deeplift.util.run_function_in_batches(
                                input_data_list=[X_test],
                                func=deeplift_prediction_func,
                                batch_size=200,
                                progress_update=None)
print("difference in predictions:",np.max(np.array(converted_model_predictions)-np.array(original_model_predictions)))
assert np.max(np.array(converted_model_predictions)-np.array(original_model_predictions)) < 10**-5
predictions = converted_model_predictions

difference in predictions: 0.0


In [6]:
### specify layers which we want to backprop with different methods
### find_scores_layer_idx=0 means scores for input layer, 
### target_layer_idx=-2 for nonlinear softmax and sigmoid outputs
from keras import backend as K
import deeplift
from deeplift.util import get_integrated_gradients_function

revealcancel_func = revealcancel_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)
grad_times_inp_func = grad_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)
guided_backprop_times_inp_func = guided_backprop_model.get_target_contribs_func(find_scores_layer_idx=0, target_layer_idx=-2)

gradient_func = grad_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-2)
guided_backprop_func = guided_backprop_model.get_target_multipliers_func(find_scores_layer_idx=0, target_layer_idx=-2)

#pure-gradients or pure-guidedbackprop perform rather poorly because they produce scores on pixels that are 0 (which are
#the backround in MNIST). But we can give them a slight advantage by masking out positions that
#are zero. Also, the method of simonyan et al uses the magnitude of the gradient.
simonyan_func_masked = lambda input_data_list, **kwargs: ((input_data_list[0]>0.0)*
                        np.abs(np.array(gradient_func(input_data_list=input_data_list,**kwargs))))
guided_backprop_func_masked = lambda input_data_list, **kwargs: ((input_data_list[0]>0.0)*
                               guided_backprop_func(input_data_list=input_data_list, **kwargs))

#prepare the integrated gradients scoring function
#heads-up: these take 5x and 10x longer to compute respectively!
integrated_grads_5 = get_integrated_gradients_function(gradient_func, 5)
integrated_grads_10 = get_integrated_gradients_function(gradient_func, 10)

In [7]:
#input_test = (pd.read_csv('/Users/Zireael/Desktop/Maslov/argonne/feature_importance/mean_normal_baseline.csv',header=None).values).astype('float32')
input_test = np.mean(X_test[df_y_test == 0], axis=0)
#X_ref = input_test[:, 1:seqlen].astype(np.float32)
input_test.ravel()

array([4.57771656e+05, 6.00685039e+04, 5.62067250e+05, 8.19286125e+05,
       1.91374156e+05, 9.28475703e+04, 2.00489062e+05, 6.53191250e+05,
       2.18259078e+05, 3.61834094e+05, 1.50178484e+05, 2.65106781e+05,
       1.37154594e+05, 4.41414812e+05, 8.25155391e+04, 4.63100117e+04,
       6.84963938e+05, 2.57520906e+05, 4.71978375e+05, 4.32638031e+05,
       6.25209250e+05, 2.19609141e+04, 1.32605812e+06, 4.42470812e+05,
       2.32534484e+05, 1.79049516e+05, 2.93532312e+05, 6.51873477e+04,
       1.70364953e+05, 1.63719641e+05, 2.13173848e+04, 2.61158250e+05,
       1.08424600e+06, 4.82172438e+05, 3.18841172e+04, 8.34220875e+05,
       1.30313742e+05, 2.34952672e+05, 3.49474344e+05, 7.90128906e+04,
       2.75429562e+05, 6.51352938e+05, 1.81946012e+06, 3.83369188e+05,
       4.28877031e+05, 1.81276078e+05, 7.98416875e+05, 1.15946172e+05,
       6.87758672e+04, 3.77742975e+06, 1.67479325e+06, 1.87348172e+05,
       1.55595422e+05, 1.46549234e+05, 4.45471172e+04, 6.30570938e+05,
      

In [8]:
### compute scores for 0(normal) and 1(tumor)
from collections import OrderedDict
method_to_task_to_scores = OrderedDict()
print("HEADS UP! integrated_grads_5 and integrated_grads_10 take 5x and 10x longer to run respectively")
print("Consider leaving them out to get faster results")
for method_name, score_func in [
                               ('revealcancel', revealcancel_func),
                               ('guided_backprop_masked', guided_backprop_func_masked),
                               ('guided_backprop_times_inp', guided_backprop_times_inp_func),
                               ('simonyan_masked', simonyan_func_masked), 
                               ('grad_times_inp', grad_times_inp_func),
                               ('integrated_grads_5', integrated_grads_5),
                               ('integrated_grads_10', integrated_grads_10)
]:
    print("Computing scores for:",method_name)
    method_to_task_to_scores[method_name] = {}
    for task_idx in range(2):
        print("\tComputing scores for task: "+str(task_idx))
        scores = np.array(score_func(
                    task_idx=task_idx,
                    input_data_list=[X_test],
                    #input_references_list=[np.zeros_like(X_test)],
                    input_references_list=input_test,
                    batch_size=1000,
                    progress_update=None))
        method_to_task_to_scores[method_name][task_idx] = scores

HEADS UP! integrated_grads_5 and integrated_grads_10 take 5x and 10x longer to run respectively
Consider leaving them out to get faster results
Computing scores for: revealcancel
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: guided_backprop_masked
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: guided_backprop_times_inp
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: simonyan_masked
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: grad_times_inp
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: integrated_grads_5
	Computing scores for task: 0
	Computing scores for task: 1
Computing scores for: integrated_grads_10
	Computing scores for task: 0
	Computing scores for task: 1


In [9]:
scores[0]

array([ 4.08332900e+04, -3.78428433e+03, -2.45290733e+05, -1.60821689e+03,
       -8.54301642e+03,  5.14283499e+04, -2.01754650e+04,  3.67260824e+04,
       -2.04314263e+04, -4.49132316e+04, -1.03247347e+04,  1.98371743e+03,
        9.55979790e+04,  5.40579074e+03, -3.07579589e+04, -6.31568471e+03,
       -1.89474532e+04,  6.38995476e+02,  1.88573276e+02,  4.69748488e+03,
        2.71079899e+04,  1.42449269e+04, -4.10443878e+04, -5.09606112e+04,
       -6.70545588e+04,  2.35141611e+03,  4.29705707e+04,  5.71331288e+04,
       -6.12075390e+04, -6.34499042e+03, -1.79442355e+04,  4.21862187e+03,
       -1.14188308e+04, -1.51273554e+04, -3.55224098e+04,  1.08246075e+03,
       -3.06107906e+04, -2.56267198e+04, -2.99745542e+04, -4.82251538e+04,
        6.16165199e+04, -1.31565307e+04, -2.49560296e+04, -5.32081234e+04,
        4.56984927e+04, -1.54626314e+04, -3.42256237e+04, -2.56960322e+04,
        6.23160023e+04, -6.42757252e+04, -3.32829352e+05, -3.02896763e+04,
        4.25825551e+04, -

In [10]:
### save scores
index = ['Row'+str(i) for i in range(1, len(scores)+1)]
df = pd.DataFrame(scores, index=index)
#scores.to_csv("scores.csv", sep='\t')

In [11]:
method_to_task_to_scores['revealcancel'][0]

array([[ -9616.77849455,  -1223.11897227,  67720.1572349 , ...,
          2692.38043411,   -178.21567173, -14531.81076483],
       [ -2355.65460398,   6367.04196686,  44179.80013703, ...,
         -8292.28626898,   4091.51866647, -12060.20860449],
       [ -9794.91802203,   1153.31139941,   7630.47171324, ...,
        -12230.52718523,   1580.88095986, -13831.72094322],
       ...,
       [   828.46391739,   4772.72222897,  57039.09485824, ...,
          3604.04661768,   2758.16492261, -12949.80254649],
       [ -6645.66681455,   1920.17311523,   -322.61099943, ...,
        -11165.35083907,    446.85258144, -13824.5439926 ],
       [ -8755.54014296,   2877.05714579,  39494.34255797, ...,
          6649.16256027,   -260.69026666, -13116.96972222]])

In [12]:
index = ['Row'+str(i) for i in range(1, len(method_to_task_to_scores['revealcancel'][0])+1)]
df = pd.DataFrame(method_to_task_to_scores['revealcancel'][0], index=index)
df.to_csv("revealcancel_0_scores_mean.csv", sep='\t')

In [13]:
index = ['Row'+str(i) for i in range(1, len(method_to_task_to_scores['revealcancel'][1])+1)]
df = pd.DataFrame(method_to_task_to_scores['revealcancel'][1], index=index)
df.to_csv("revealcancel_1_scores_mean.csv", sep='\t')