In [21]:
import numpy as np
import pandas as pd
import glob
from os import listdir
import seaborn as sns
from os.path import isfile, join
from sklearn import preprocessing, metrics

from scripts.CompareExplanation import get_final_ranking, summarize_auc, get_final_ranking_new
from scripts.Explanation import GetExplanation, reshape_lime_explanation, load_explanation, create_random_explanation
import utils.visualization as vis
from utils.data import LocalDataLoader, data_summary

import warnings
warnings.filterwarnings('ignore')

%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [27]:
def get_pos_saliency(X, threshold=0.5):
    """Converts a probability array to prediction array of 0,1 with specified threshold
    """
    X_scaled = (X - X.min(axis=1,keepdims=True)) / (X.max(axis=1,keepdims=True) - X.min(axis=1,keepdims=True))
    return X_scaled>=threshold

def index_to_label(start_end_array, n_steps=50):
    """Converts from true saliency with start and end values to a 0,1 numpy array
    """
    ans = list(np.zeros(shape=(start_end_array.shape[0],n_steps)))
    start_end_array = list(start_end_array)
    for i, val in enumerate(start_end_array):
        start, end = int(val[0]), int(val[1])
        for j in range(start,end):
            ans[i][j] = 1
        
    return np.array(ans)

def get_roc(dataset,datapath):
    data = np.load('data/synth/'+ds+'_TEST_meta.npy')
    true_saliency_index = data[:,1:3]

    true_saliency = index_to_label(true_saliency_index)

    xais = ['GradientShap','IG','LIME','ROCKET','MrSEQL','random2020'] 
    lime_xais = ['lime','LIME','Lime','ROCKET']
    for xai in xais:
        if xai == 'random2020':
            explainer_saliency, random_weight_name = create_random_explanation(datapath=ds_dir,dataset=ds)

        else:
            is_reshape=True if xai in lime_xais else False
            explainer_saliency = load_explanation(datapath=ds_dir,dataset=ds,explanation_type=xai,reshape_lime=is_reshape)
        explainer_saliency = get_pos_saliency(explainer_saliency)
        #     plt.plot(explainer_saliency[10])
        ans = metrics.roc_auc_score(true_saliency.flatten(), explainer_saliency.flatten())
        print('ROC of method ', xai, ': ', round(ans,4))

def get_comparison_result(ds, result_path, include_ranking_by_perturbation_method=True):
    colnames = ['dataset', 'best','worst']
    df = pd.DataFrame(columns=colnames)
    
    if ds not in ['Beef','Ham','OliveOil','Wine','Car']:
#             print(ds)
        path = glob.glob('./%s/%s_*.csv'%(result_path,ds))
        if not path:
            print('No comparison result exists')
            print('---')
        else:
            data = [pd.read_csv(p) for p in path]
            auc_df=pd.concat(data, ignore_index=True, axis=0)
            auc_df = auc_df[auc_df['noise_type'] !='original_gaussian']
            x = get_final_ranking_new(auc_df,beautify_display=True,ranking_by_perturbation_method=False)
            if include_ranking_by_perturbation_method:
                x2 = get_final_ranking_new(auc_df,beautify_display=True,ranking_by_perturbation_method=True)
            curr_best = x['XAI_method'][x['scaled_ranking'].idxmin()] 
            curr_worst = x['XAI_method'][x['scaled_ranking'].idxmax()]
            df = df.append({'dataset': ds,
                           'best':curr_best,
                            'worst':curr_worst,
                           }, ignore_index=True)
        print(df)
    return

## 1. Synthetic Datasets

In [23]:
ds_list = [
    'SmallMiddle_CAR',
    'RareTime_NARMA'
          ]
ds_dir = 'data'
for ds in ds_list:
    print(ds)
    get_roc(ds,ds_dir)
    print('--------------')

SmallMiddle_CAR
ROC of method  GradientShap :  0.9149
ROC of method  IG :  0.9368
ROC of method  LIME :  0.3265
ROC of method  ROCKET :  0.4908
ROC of method  MrSEQL :  0.7661
ROC of method  random2020 :  0.524
--------------
RareTime_NARMA
ROC of method  GradientShap :  0.8783
ROC of method  IG :  0.9119
ROC of method  LIME :  0.5665
ROC of method  ROCKET :  0.6864
ROC of method  MrSEQL :  0.5768
ROC of method  random2020 :  0.5054
--------------


## 2. 5 datasets: CMJ, ECG200, CBF, Coffee,GunPoint

### Before merging and resplitting

In [28]:
ds_list = [    'CMJ','ECG200','CBF','Coffee','GunPoint']
for ds in ds_list:
    get_comparison_result(ds=ds,result_path='output', include_ranking_by_perturbation_method=False)

Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,GS,0.746029,0.920722
1,CMJ,IG,0.803519,1.0
2,CMJ,cam,0.666211,0.810652
3,CMJ,lime_mrseql,0.078362,0.0
4,CMJ,lime_rocket,0.415703,0.465197
5,CMJ,mrseql,0.132448,0.074586
6,CMJ,random2020,0.602808,0.723218
7,CMJ,ridgecv,0.541272,0.638359


  dataset         best worst
0     CMJ  lime_mrseql    IG


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,GS,0.56285,0.676215
1,ECG200,IG,0.545982,0.646341
2,ECG200,cam,0.667466,0.861485
3,ECG200,lime_mrseql,0.22829,0.083723
4,ECG200,lime_rocket,0.181014,0.0
5,ECG200,mrseql,0.474681,0.52007
6,ECG200,random2020,0.745681,1.0
7,ECG200,ridgecv,0.341902,0.284926


  dataset         best       worst
0  ECG200  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,GS,0.239632,0.08473
1,CBF,IG,0.20902,0.04726
2,CBF,cam,0.326997,0.191665
3,CBF,lime_mrseql,0.39523,0.275182
4,CBF,lime_rocket,0.170408,0.0
5,CBF,mrseql,0.510962,0.416838
6,CBF,random2020,0.987402,1.0
7,CBF,ridgecv,0.314792,0.176726


  dataset         best       worst
0     CBF  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,GS,0.44965,0.530313
1,Coffee,IG,0.371433,0.396716
2,Coffee,cam,0.148113,0.015282
3,Coffee,lime_mrseql,0.349563,0.359362
4,Coffee,lime_rocket,0.527073,0.662551
5,Coffee,mrseql,0.139166,0.0
6,Coffee,random2020,0.724641,1.0
7,Coffee,ridgecv,0.361495,0.379741


  dataset    best       worst
0  Coffee  mrseql  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,GS,0.3458,0.263226
1,GunPoint,IG,0.192482,0.0
2,GunPoint,cam,0.226206,0.0579
3,GunPoint,lime_mrseql,0.274115,0.140153
4,GunPoint,lime_rocket,0.77494,1.0
5,GunPoint,mrseql,0.244799,0.089822
6,GunPoint,random2020,0.404533,0.364063
7,GunPoint,ridgecv,0.473403,0.482303


    dataset best        worst
0  GunPoint   IG  lime_rocket


### After merging and resplitting

In [31]:
ds_list = [    'CMJ','ECG200','CBF','Coffee','GunPoint']
for ds in ds_list:
    get_comparison_result(ds=ds,result_path='newoutput', include_ranking_by_perturbation_method=False)

Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,GS,0.84187,0.959623
1,CMJ,IG,0.868203,1.0
2,CMJ,lime_mrseql,0.216006,0.0
3,CMJ,lime_rocket,0.482849,0.409144
4,CMJ,mrseql,0.245008,0.044469
5,CMJ,random2020,0.458043,0.371111
6,CMJ,ridgecv,0.638544,0.647869


  dataset         best worst
0     CMJ  lime_mrseql    IG


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,GS,0.542628,0.666293
1,ECG200,IG,0.609927,0.775463
2,ECG200,lime_mrseql,0.400619,0.435931
3,ECG200,lime_rocket,0.131885,0.0
4,ECG200,mrseql,0.690164,0.905621
5,ECG200,random2020,0.748345,1.0
6,ECG200,ridgecv,0.466789,0.54327


  dataset         best       worst
0  ECG200  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,GS,0.423965,0.300633
1,CBF,IG,0.320908,0.174623
2,CBF,lime_mrseql,0.38456,0.252452
3,CBF,lime_rocket,0.178094,0.0
4,CBF,mrseql,0.39917,0.270316
5,CBF,random2020,0.995936,1.0
6,CBF,ridgecv,0.508384,0.403855


  dataset         best       worst
0     CBF  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,GS,0.393789,0.336389
1,Coffee,IG,0.291673,0.15271
2,Coffee,lime_mrseql,0.375961,0.30432
3,Coffee,lime_rocket,0.498391,0.524538
4,Coffee,mrseql,0.297727,0.163601
5,Coffee,random2020,0.762725,1.0
6,Coffee,ridgecv,0.206773,0.0


  dataset     best       worst
0  Coffee  ridgecv  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,GS,0.625253,1.0
1,GunPoint,IG,0.585425,0.889385
2,GunPoint,lime_mrseql,0.392889,0.35464
3,GunPoint,lime_rocket,0.557401,0.811552
4,GunPoint,mrseql,0.343995,0.218844
5,GunPoint,random2020,0.2652,0.0
6,GunPoint,ridgecv,0.404436,0.386712


    dataset        best worst
0  GunPoint  random2020    GS


## Include ranking for perturbation method:

In [32]:
ds_list = [    'CMJ','ECG200','CBF','Coffee','GunPoint']
for ds in ds_list:
    get_comparison_result(ds=ds,result_path='newoutput', include_ranking_by_perturbation_method=True)

Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,GS,0.84187,0.959623
1,CMJ,IG,0.868203,1.0
2,CMJ,lime_mrseql,0.216006,0.0
3,CMJ,lime_rocket,0.482849,0.409144
4,CMJ,mrseql,0.245008,0.044469
5,CMJ,random2020,0.458043,0.371111
6,CMJ,ridgecv,0.638544,0.647869


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,local_gaussian,GS,0.901913,0.941048
1,CMJ,local_gaussian,IG,0.953893,1.0
2,CMJ,local_gaussian,lime_mrseql,0.169435,0.110311
3,CMJ,local_gaussian,lime_rocket,0.620892,0.622328
4,CMJ,local_gaussian,mrseql,0.072172,0.0
5,CMJ,local_gaussian,random2020,0.356803,0.322813
6,CMJ,local_gaussian,ridgecv,0.780917,0.80382
7,CMJ,global_gaussian,GS,0.754729,0.77961
8,CMJ,global_gaussian,IG,0.882593,1.0
9,CMJ,global_gaussian,lime_mrseql,0.416366,0.196402


  dataset         best worst
0     CMJ  lime_mrseql    IG


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,GS,0.542628,0.666293
1,ECG200,IG,0.609927,0.775463
2,ECG200,lime_mrseql,0.400619,0.435931
3,ECG200,lime_rocket,0.131885,0.0
4,ECG200,mrseql,0.690164,0.905621
5,ECG200,random2020,0.748345,1.0
6,ECG200,ridgecv,0.466789,0.54327


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,local_gaussian,GS,0.375673,0.267804
1,ECG200,local_gaussian,IG,0.622104,0.683998
2,ECG200,local_gaussian,lime_mrseql,0.362572,0.245678
3,ECG200,local_gaussian,lime_rocket,0.217105,0.0
4,ECG200,local_gaussian,mrseql,0.809211,1.0
5,ECG200,local_gaussian,random2020,0.560246,0.579526
6,ECG200,local_gaussian,ridgecv,0.488061,0.457615
7,ECG200,global_gaussian,GS,0.62516,0.966748
8,ECG200,global_gaussian,IG,0.635619,0.983335
9,ECG200,global_gaussian,lime_mrseql,0.462082,0.708099


  dataset         best       worst
0  ECG200  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,GS,0.423965,0.300633
1,CBF,IG,0.320908,0.174623
2,CBF,lime_mrseql,0.38456,0.252452
3,CBF,lime_rocket,0.178094,0.0
4,CBF,mrseql,0.39917,0.270316
5,CBF,random2020,0.995936,1.0
6,CBF,ridgecv,0.508384,0.403855


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,local_gaussian,GS,0.434717,0.33582
1,CBF,local_gaussian,IG,0.277501,0.1511
2,CBF,local_gaussian,lime_mrseql,0.43549,0.336729
3,CBF,local_gaussian,lime_rocket,0.1489,0.0
4,CBF,local_gaussian,mrseql,0.220233,0.083813
5,CBF,local_gaussian,random2020,1.0,1.0
6,CBF,local_gaussian,ridgecv,0.463792,0.369983
7,CBF,global_gaussian,GS,0.459401,0.292167
8,CBF,global_gaussian,IG,0.236263,0.0
9,CBF,global_gaussian,lime_mrseql,0.370643,0.17595


  dataset         best       worst
0     CBF  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,GS,0.393789,0.336389
1,Coffee,IG,0.291673,0.15271
2,Coffee,lime_mrseql,0.375961,0.30432
3,Coffee,lime_rocket,0.498391,0.524538
4,Coffee,mrseql,0.297727,0.163601
5,Coffee,random2020,0.762725,1.0
6,Coffee,ridgecv,0.206773,0.0


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,local_gaussian,GS,0.32381,0.219244
1,Coffee,local_gaussian,IG,0.3125,0.206186
2,Coffee,local_gaussian,lime_mrseql,0.495833,0.417869
3,Coffee,local_gaussian,lime_rocket,0.409524,0.318213
4,Coffee,local_gaussian,mrseql,0.133929,0.0
5,Coffee,local_gaussian,random2020,1.0,1.0
6,Coffee,local_gaussian,ridgecv,0.303571,0.195876
7,Coffee,global_gaussian,GS,0.309874,0.59446
8,Coffee,global_gaussian,IG,0.407121,0.868192
9,Coffee,global_gaussian,lime_mrseql,0.383514,0.801743


  dataset     best       worst
0  Coffee  ridgecv  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,GS,0.625253,1.0
1,GunPoint,IG,0.585425,0.889385
2,GunPoint,lime_mrseql,0.392889,0.35464
3,GunPoint,lime_rocket,0.557401,0.811552
4,GunPoint,mrseql,0.343995,0.218844
5,GunPoint,random2020,0.2652,0.0
6,GunPoint,ridgecv,0.404436,0.386712


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,local_gaussian,GS,0.912365,1.0
1,GunPoint,local_gaussian,IG,0.80823,0.885862
2,GunPoint,local_gaussian,lime_mrseql,0.364295,0.399287
3,GunPoint,local_gaussian,lime_rocket,0.681391,0.74684
4,GunPoint,local_gaussian,mrseql,0.268535,0.294329
5,GunPoint,local_gaussian,random2020,0.0,0.0
6,GunPoint,local_gaussian,ridgecv,0.480157,0.526277
7,GunPoint,global_gaussian,GS,0.274936,0.0
8,GunPoint,global_gaussian,IG,0.400689,0.42715
9,GunPoint,global_gaussian,lime_mrseql,0.484497,0.711821


    dataset        best worst
0  GunPoint  random2020    GS


In [33]:
ds_list = [    'CMJ','ECG200','CBF','Coffee','GunPoint']
for ds in ds_list:
    get_comparison_result(ds=ds,result_path='output', include_ranking_by_perturbation_method=True)

Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,GS,0.746029,0.920722
1,CMJ,IG,0.803519,1.0
2,CMJ,cam,0.666211,0.810652
3,CMJ,lime_mrseql,0.078362,0.0
4,CMJ,lime_rocket,0.415703,0.465197
5,CMJ,mrseql,0.132448,0.074586
6,CMJ,random2020,0.602808,0.723218
7,CMJ,ridgecv,0.541272,0.638359


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,CMJ,local_gaussian,GS,0.786553,0.866548
1,CMJ,local_gaussian,IG,0.907509,1.0
2,CMJ,local_gaussian,cam,0.601887,0.662806
3,CMJ,local_gaussian,lime_mrseql,0.077642,0.084403
4,CMJ,local_gaussian,lime_rocket,0.397247,0.437026
5,CMJ,local_gaussian,mrseql,0.001142,0.0
6,CMJ,local_gaussian,random2020,0.564412,0.621459
7,CMJ,local_gaussian,ridgecv,0.564887,0.621983
8,CMJ,global_gaussian,GS,0.78056,0.919062
9,CMJ,global_gaussian,IG,0.838762,1.0


  dataset         best worst
0     CMJ  lime_mrseql    IG


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,GS,0.56285,0.676215
1,ECG200,IG,0.545982,0.646341
2,ECG200,cam,0.667466,0.861485
3,ECG200,lime_mrseql,0.22829,0.083723
4,ECG200,lime_rocket,0.181014,0.0
5,ECG200,mrseql,0.474681,0.52007
6,ECG200,random2020,0.745681,1.0
7,ECG200,ridgecv,0.341902,0.284926


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,ECG200,local_gaussian,GS,0.674545,1.0
1,ECG200,local_gaussian,IG,0.601161,0.85287
2,ECG200,local_gaussian,cam,0.532278,0.714761
3,ECG200,local_gaussian,lime_mrseql,0.175781,0.0
4,ECG200,local_gaussian,lime_rocket,0.281285,0.211532
5,ECG200,local_gaussian,mrseql,0.50188,0.653814
6,ECG200,local_gaussian,random2020,0.613762,0.878134
7,ECG200,local_gaussian,ridgecv,0.324385,0.297944
8,ECG200,global_gaussian,GS,0.575042,0.646329
9,ECG200,global_gaussian,IG,0.435231,0.452561


  dataset         best       worst
0  ECG200  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,GS,0.239632,0.08473
1,CBF,IG,0.20902,0.04726
2,CBF,cam,0.326997,0.191665
3,CBF,lime_mrseql,0.39523,0.275182
4,CBF,lime_rocket,0.170408,0.0
5,CBF,mrseql,0.510962,0.416838
6,CBF,random2020,0.987402,1.0
7,CBF,ridgecv,0.314792,0.176726


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,CBF,local_gaussian,ridgecv,0.0,
1,CBF,global_gaussian,ridgecv,0.0,
2,CBF,global_mean,GS,0.079803,0.055285
3,CBF,global_mean,IG,0.025953,0.0
4,CBF,global_mean,cam,0.387265,0.370939
5,CBF,global_mean,lime_mrseql,0.349253,0.331915
6,CBF,global_mean,lime_rocket,0.285501,0.266464
7,CBF,global_mean,mrseql,0.524822,0.512161
8,CBF,global_mean,random2020,1.0,1.0
9,CBF,global_mean,ridgecv,0.625727,0.615755


  dataset         best       worst
0     CBF  lime_rocket  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,GS,0.44965,0.530313
1,Coffee,IG,0.371433,0.396716
2,Coffee,cam,0.148113,0.015282
3,Coffee,lime_mrseql,0.349563,0.359362
4,Coffee,lime_rocket,0.527073,0.662551
5,Coffee,mrseql,0.139166,0.0
6,Coffee,random2020,0.724641,1.0
7,Coffee,ridgecv,0.361495,0.379741


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,Coffee,local_gaussian,GS,0.42834,0.337353
1,Coffee,local_gaussian,IG,0.449295,0.361644
2,Coffee,local_gaussian,cam,0.137308,0.0
3,Coffee,local_gaussian,lime_mrseql,0.439215,0.349959
4,Coffee,local_gaussian,lime_rocket,0.352909,0.249917
5,Coffee,local_gaussian,mrseql,0.193944,0.065651
6,Coffee,local_gaussian,random2020,1.0,1.0
7,Coffee,local_gaussian,ridgecv,0.570775,0.502459
8,Coffee,global_gaussian,GS,0.30102,0.460026
9,Coffee,global_gaussian,IG,0.228569,0.263412


  dataset    best       worst
0  Coffee  mrseql  random2020


Unnamed: 0,dataset,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,GS,0.3458,0.263226
1,GunPoint,IG,0.192482,0.0
2,GunPoint,cam,0.226206,0.0579
3,GunPoint,lime_mrseql,0.274115,0.140153
4,GunPoint,lime_rocket,0.77494,1.0
5,GunPoint,mrseql,0.244799,0.089822
6,GunPoint,random2020,0.404533,0.364063
7,GunPoint,ridgecv,0.473403,0.482303


Unnamed: 0,dataset,noise_type,XAI_method,average_scaled_auc,scaled_ranking
0,GunPoint,local_gaussian,GS,0.349828,0.1856
1,GunPoint,local_gaussian,IG,0.202887,0.001542
2,GunPoint,local_gaussian,cam,0.245836,0.05534
3,GunPoint,local_gaussian,lime_mrseql,0.369937,0.210788
4,GunPoint,local_gaussian,lime_rocket,1.0,1.0
5,GunPoint,local_gaussian,mrseql,0.201656,0.0
6,GunPoint,local_gaussian,random2020,0.281557,0.100083
7,GunPoint,local_gaussian,ridgecv,0.56405,0.453932
8,GunPoint,global_gaussian,GS,0.219125,0.187668
9,GunPoint,global_gaussian,IG,0.173142,0.106541


    dataset best        worst
0  GunPoint   IG  lime_rocket
