In [None]:
# This notebook is a descendent from "Gradient Correction 1.ipynb" i  "Gradient Correction 2.ipynb"

In [1]:
from google.colab import drive
drive.mount('/content/drive/')

Mounted at /content/drive/


In [2]:
#!pip uninstall gradientcorrection  
!pip install https://github.com/Antonio-Ionaton/GradientCorrection/tarball/main

Collecting https://github.com/Antonio-Ionaton/GradientCorrection/tarball/main
  Downloading https://github.com/Antonio-Ionaton/GradientCorrection/tarball/main
[K     / 3.5 MB 3.2 MB/s
[?25hBuilding wheels for collected packages: gradientcorrection
  Building wheel for gradientcorrection (setup.py) ... [?25l[?25hdone
  Created wheel for gradientcorrection: filename=gradientcorrection-0.1-py3-none-any.whl size=8848 sha256=05b4c912302e6c016fd16651b2788d342c8df8d296abc18192f21e3984e35ff4
  Stored in directory: /tmp/pip-ephem-wheel-cache-sz0nn5zp/wheels/03/4c/0a/9516d8fa386ed9a21d1aa50e8cc5b1f6d6f576195897b24409
Successfully built gradientcorrection
Installing collected packages: gradientcorrection
Successfully installed gradientcorrection-0.1


In [3]:
import gradientcorrection
from gradientcorrection import layers, utils, metrics, helper, explain, model_zoo, geomath

In [4]:
# MOCK_TEST
results_path = utils.make_directory('drive/My Drive/results_mock', 'task3')  #utils.make_directory('../results', 'task3')
params_path = utils.make_directory(results_path, 'model_params')  #utils.make_directory(results_path, 'model_params')
analysis_results_path = utils.make_directory(results_path, 'analysis')
angles_path = utils.make_directory(analysis_results_path, 'angles')

In [5]:
################################################ Generate attribution maps and save. Generate performance scores and save. 
import os
import numpy as np
from six.moves import cPickle
from tensorflow import keras

#------------------------------------------------------------------------
performance_map={}
#------------------------------------------------------------------------

num_trials = 50
model_names = ['cnn-local', 'cnn-dist'] #['cnn-dist', 'cnn-local']
activations = ['relu', 'exponential'] # ['relu', 'exponential']

#------------------------------------------------------------------------

# load data
data_path = 'drive/My Drive/data/synthetic_code_dataset.h5'
data = helper.load_data(data_path)
x_train, y_train, x_valid, y_valid, x_test, y_test = data

# load ground truth values
test_model = helper.load_synthetic_models(data_path, dataset='test')
true_index = np.where(y_test[:,0] == 1)[0]
X = x_test[true_index][:500]  
X_model = test_model[true_index][:500]   

#------------------------------------------------------------------------

for model_name in model_names:
    for activation in activations:
        
        saliencyCOS_print=[]
        integratedCOS_print=[]
        saliency1AUROC_AUPR_print=[]
        saliency2AUROC_AUPR_print=[]
        integrated1AUROC_AUPR_print=[]
        integrated2AUROC_AUPR_print=[]
        attr_score1_cumulative = []
        attr_score2_cumulative = []
        saliency_map_raw_cumulative = []        
        saliency_scores = []
        integrated_scores = []
        performance_list=[]
        for trial in range(num_trials):
            keras.backend.clear_session()
            
            # load model
            model = helper.load_model(model_name, activation=activation)
            name = model_name+'_'+activation+'_'+str(trial)
            print('model: ' + name)

            # compile model
            helper.compile_model(model)

            # load model
            weights_path = os.path.join(params_path, name+'.hdf5')
            model.load_weights(weights_path)

            # interpretability performance with saliency maps
            print('saliency maps')
            saliency_scores.append(explain.saliency(model, X, class_index=0, layer=-1))

            # interpretability performance with integrated gradients
            print('integrated gradients maps')
            integrated_scores.append(explain.integrated_grad(model, X, class_index=0, layer=-1,
                                                        num_background=10, num_steps=20,
                                                        reference='shuffle'))    


############ CORRECTION PART , Comparison of attribution maps

            #### Ground truth = probability normalized. (For example, non-informative positions in prob. space such as [0.25, 0.25, 0.25, 0.25] should be mapped to [0,0,0,0])
            X_model_normalized =  np.swapaxes(X_model, 1,2) -0.25

            #Raw saliency scores.  
            saliency_map_raw = np.array(saliency_scores) [trial]
            integrated_map_raw = np.array(integrated_scores) [trial]


            #################### Full saliency, 4 derivatives
            attr_score1 = saliency_map_raw 
            attr_score2 = saliency_map_raw - 0.25 * np.sum(saliency_map_raw,axis=2, keepdims=True)
            #################### Full integrated gradients, 4 derivatives 
            attr_score1_integrated = integrated_map_raw
            attr_score2_integrated = integrated_map_raw -0.25 * np.sum(integrated_map_raw, axis=2, keepdims=True) 

            #################### Wild nucleotides only, only 1 derivative, corresponding to the nucleotide present. Obtained by multiplying by X. 
            attr_score_wild = saliency_map_raw * X 
            attr_score_wild2 = ( saliency_map_raw - 0.25 * np.sum(saliency_map_raw, axis=2, keepdims=True)  ) * X            
            #################### Wild, integrated gradients
            #Default
            attr_score_wild_integrated = integrated_map_raw * X 
            attr_score_wild_integrated2 = ( integrated_map_raw - 0.25 * np.sum(integrated_map_raw, axis=2, keepdims=True)  ) * X
            
            #Cosine (full map, 4 derivatives)
            saliencyCOS_print.append([geomath.Scalar_product(attr_score1, X_model_normalized), geomath.Scalar_product(attr_score2, X_model_normalized)]) 
            integratedCOS_print.append([geomath.Scalar_product(attr_score1_integrated, X_model_normalized), geomath.Scalar_product(attr_score2_integrated, X_model_normalized)])
            #AUROC and AUPR, only the wild nucleotide derivative 
            saliency1AUROC_AUPR_print.append([np.average(helper.interpretability_performance(X, attr_score_wild, X_model)[0]) , np.average(helper.interpretability_performance(X, attr_score_wild, X_model)[1]) ])
            integrated1AUROC_AUPR_print.append([np.average(helper.interpretability_performance(X, attr_score_wild_integrated, X_model)[0]) , np.average(helper.interpretability_performance(X, attr_score_wild_integrated, X_model)[1]) ])
            saliency2AUROC_AUPR_print.append([np.average(helper.interpretability_performance(X, attr_score_wild2, X_model)[0]) , np.average(helper.interpretability_performance(X, attr_score_wild2, X_model)[1]) ])
            integrated2AUROC_AUPR_print.append([np.average(helper.interpretability_performance(X, attr_score_wild_integrated2, X_model)[0]) , np.average(helper.interpretability_performance(X, attr_score_wild_integrated2, X_model)[1])])

            # Here, "cumulative" stands for results from many runs appended 
            attr_score1_cumulative.append(attr_score1)
            attr_score2_cumulative.append(attr_score2)
            saliency_map_raw_cumulative.append(saliency_map_raw)

            ################################### Generate performance maps (AUC classification performance statistics, evaluate and save)
            #Evaluate, populate lists 
            predictions = model.predict(x_test)                
            mean_vals, std_vals = metrics.calculate_metrics(y_test, predictions, 'binary')
            performance_list.append(mean_vals[1])

        #SAVE attribution scores (before and after correction) statistics to files.         
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyCOSINE.txt"
        np.savetxt(filename, np.array(saliencyCOS_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyCOSINE_corrected.txt"
        np.savetxt(filename, np.array(saliencyCOS_print)[:,1] , delimiter=",") 

        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyAUROC.txt" 
        np.savetxt(filename, np.array(saliency1AUROC_AUPR_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyAUPR.txt"
        np.savetxt(filename, np.array(saliency1AUROC_AUPR_print)[:,1] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyAUROC_corrected.txt"
        np.savetxt(filename, np.array(saliency2AUROC_AUPR_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "saliencyAUPR_corrected.txt"
        np.savetxt(filename, np.array(saliency2AUROC_AUPR_print)[:,1] , delimiter=",") 

			  # IG
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_COSINE.txt"
        np.savetxt(filename, np.array(integratedCOS_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_COSINE_corrected.txt"
        np.savetxt(filename, np.array(integratedCOS_print)[:,1] , delimiter=",") 

        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_AUROC.txt"
        np.savetxt(filename, np.array(integrated1AUROC_AUPR_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_AUPR.txt"
        np.savetxt(filename, np.array(integrated1AUROC_AUPR_print)[:,1] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_AUROC_corrected.txt"
        np.savetxt(filename, np.array(integrated2AUROC_AUPR_print)[:,0] , delimiter=",") 
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "IG_AUPR_corrected.txt"
        np.savetxt(filename, np.array(integrated2AUROC_AUPR_print)[:,1] , delimiter=",") 

        #SAVE performance scores 
        #performance_map[model_name + "_" + activation] =performance_list    
        filename = analysis_results_path + '/' + model_name + "_" + activation + "_" + "performance.txt"
        np.savetxt(filename, np.array(performance_list) , delimiter=",")         
                


Instructions for updating:
Colocations handled automatically by placer.
model: cnn-dist_relu_0
saliency maps

integrated gradients maps




model: cnn-dist_relu_1
saliency maps
integrated gradients maps
model: cnn-dist_relu_2
saliency maps
integrated gradients maps
model: cnn-dist_relu_3
saliency maps
integrated gradients maps
model: cnn-dist_relu_4
saliency maps
integrated gradients maps
model: cnn-dist_relu_5
saliency maps
integrated gradients maps
model: cnn-dist_relu_6
saliency maps
integrated gradients maps
model: cnn-dist_relu_7
saliency maps
integrated gradients maps
model: cnn-dist_relu_8
saliency maps
integrated gradients maps
model: cnn-dist_relu_9
saliency maps
integrated gradients maps
model: cnn-dist_relu_10
saliency maps
integrated gradients maps
model: cnn-dist_relu_11
saliency maps
integrated gradients maps
model: cnn-dist_relu_12
saliency maps
integrated gradients maps
model: cnn-dist_relu_13
saliency maps
integrated gradients maps
model: cnn-dist_relu_14
saliency maps
integrated gradients maps
model: cnn-dist_relu_15
saliency maps
integrated gradients maps
model: cnn-dist_relu_16
saliency maps
integrated 

In [36]:
######################################################################## Calculate angles
import os
import numpy as np
from six.moves import cPickle
from tensorflow import keras

num_trials = 50
model_names = ['cnn-local', 'cnn-dist'] #['cnn-dist', 'cnn-local']
activations = ['relu', 'exponential'] # ['relu', 'exponential']

#------------------------------------------------------------------------

# load data
data_path = 'drive/My Drive/data/synthetic_code_dataset.h5'
data = helper.load_data(data_path)
x_train, y_train, x_valid, y_valid, x_test, y_test = data

# load ground truth values
test_model = helper.load_synthetic_models(data_path, dataset='test')
true_index = np.where(y_test[:,0] == 1)[0]
X = x_test[true_index][:500]  
X_model = test_model[true_index][:500]   

#------------------------------------------------------------------------

for model_name in model_names:
    for activation in activations:
        
        saliency_scores = []
        sine_list=[]
        for trial in range(num_trials):
            keras.backend.clear_session()
            
            # load model
            model = helper.load_model(model_name, activation=activation)
            name = model_name+'_'+activation+'_'+str(trial)
            print('model: ' + name)

            # compile model
            helper.compile_model(model)

            # load model
            weights_path = os.path.join(params_path, name+'.hdf5')
            model.load_weights(weights_path)

            # interpretability performance with saliency maps
            print('saliency maps')
            saliency_scores.append(explain.saliency(model, X, class_index=0, layer=-1))

############ CORRECTION PART , Comparison of attribution maps

            #### Ground truth = probability normalized. (For example, non-informative positions in prob. space such as [0.25, 0.25, 0.25, 0.25] should be mapped to [0,0,0,0])
            X_model_normalized =  np.swapaxes(X_model, 1,2) -0.25

            #Raw saliency scores.  
            saliency_map_raw = np.array(saliency_scores) [trial]

            #ANGLES
            sine = geomath.calculate_angles(saliency_map_raw)
            #Save angles.
            filename = angles_path + '/' + name + "_" + "angles.txt"
            np.savetxt(filename, np.array(sine) , delimiter=",") 
            sine_list.append(sine)

            #Improvement
            saliency_map_raw_CORRECTED = saliency_map_raw - 0.25* np.sum(saliency_map_raw, axis=-1, keepdims=True) 
            similarity1= np.sum(saliency_map_raw * X_model_normalized, axis=-1) / ( np.sqrt(np.sum(saliency_map_raw*saliency_map_raw, axis=-1))  *  np.sqrt(np.sum(X_model_normalized*X_model_normalized, axis=-1)) )
            similarity2= np.sum(saliency_map_raw_CORRECTED * X_model_normalized, axis=-1) / ( np.sqrt(np.sum(saliency_map_raw_CORRECTED*saliency_map_raw_CORRECTED, axis=-1))  *  np.sqrt(np.sum(X_model_normalized*X_model_normalized, axis=-1)) )
            improvement = similarity2 - similarity1
            #Save improvement.
            filename = angles_path + '/' + name + "_" + "improvement.txt"
            np.savetxt(filename, np.array(improvement) , delimiter=",") 

            #saliency_wild:
            saliency_wild1= np.sum(saliency_map_raw * X, axis=-1)
            saliency_wild2= np.sum(saliency_map_raw_CORRECTED * X, axis=-1)
            #Save saliency_wild:
            filename = angles_path + '/' + name + "_" + "saliency_wild1.txt"
            np.savetxt(filename, np.array(saliency_wild1) , delimiter=",")      
            filename = angles_path + '/' + name + "_" + "saliency_wild2.txt"
            np.savetxt(filename, np.array(saliency_wild2) , delimiter=",")    
       
        #Large angles
        count_large_angles30, count_large_angles45, count_large_angles60 = geomath.count_large_angles(np.array(sine_list))   
        filename = angles_path + '/' + model_name+'_'+activation + "_" + "large_angles30.txt"
        np.savetxt(filename, np.array(count_large_angles30) , delimiter=",")  
        filename = angles_path + '/' + model_name+'_'+activation + "_" + "large_angles45.txt"
        np.savetxt(filename, np.array(count_large_angles45) , delimiter=",")  
        filename = angles_path + '/' + model_name+'_'+activation + "_" + "large_angles60.txt"
        np.savetxt(filename, np.array(count_large_angles60) , delimiter=",")     

        #Ensemble
        saliency_map_raw_AVERAGE = np.average(np.array(saliency_scores)  , axis = 0)     
        sine_ensemble = geomath.calculate_angles(saliency_map_raw_AVERAGE)
        #sine_ensemble =np.array(sine_ensemble.reshape(len(sine_ensemble)*len(sine_ensemble[0]),))
        sine_ensemble = np.array(sine_ensemble)
        filename = angles_path + '/' + model_name+'_'+activation + "_" + "ensemble_angles.txt"
        np.savetxt(filename, np.array(sine_ensemble) , delimiter=",")     




model: cnn-local_relu_0
saliency maps




model: cnn-local_relu_1
saliency maps
model: cnn-local_relu_2
saliency maps
model: cnn-local_relu_3
saliency maps
model: cnn-local_relu_4
saliency maps
model: cnn-local_relu_5
saliency maps
model: cnn-local_relu_6
saliency maps
model: cnn-local_relu_7
saliency maps
model: cnn-local_relu_8
saliency maps
model: cnn-local_relu_9
saliency maps
model: cnn-local_relu_10
saliency maps
model: cnn-local_relu_11
saliency maps
model: cnn-local_relu_12
saliency maps
model: cnn-local_relu_13
saliency maps
model: cnn-local_relu_14
saliency maps
model: cnn-local_relu_15
saliency maps
model: cnn-local_relu_16
saliency maps
model: cnn-local_relu_17
saliency maps
model: cnn-local_relu_18
saliency maps
model: cnn-local_relu_19
saliency maps
model: cnn-local_relu_20
saliency maps
model: cnn-local_relu_21
saliency maps
model: cnn-local_relu_22
saliency maps
model: cnn-local_relu_23
saliency maps
model: cnn-local_relu_24
saliency maps
model: cnn-local_relu_25
saliency maps
model: cnn-local_relu_26
saliency 

  sine = 1/2 * orthogonal_residual / L2_norm


model: cnn-local_exponential_32
saliency maps
model: cnn-local_exponential_33
saliency maps
model: cnn-local_exponential_34
saliency maps
model: cnn-local_exponential_35
saliency maps
model: cnn-local_exponential_36
saliency maps
model: cnn-local_exponential_37
saliency maps
model: cnn-local_exponential_38
saliency maps
model: cnn-local_exponential_39
saliency maps
model: cnn-local_exponential_40
saliency maps
model: cnn-local_exponential_41
saliency maps
model: cnn-local_exponential_42
saliency maps
model: cnn-local_exponential_43
saliency maps
model: cnn-local_exponential_44
saliency maps
model: cnn-local_exponential_45
saliency maps
model: cnn-local_exponential_46
saliency maps
model: cnn-local_exponential_47
saliency maps
model: cnn-local_exponential_48
saliency maps
model: cnn-local_exponential_49
saliency maps
model: cnn-dist_relu_0
saliency maps
model: cnn-dist_relu_1
saliency maps
model: cnn-dist_relu_2
saliency maps
model: cnn-dist_relu_3
saliency maps
model: cnn-dist_relu_4
s