Code Available:       https://github.com/amparore/lime-stratified<br>
Examples Available:   https://github.com/rashidrao-pk/lime-stratified-examples <br>
If you use this code, please cite us: <br>

In [None]:
# Stretch Notebook Width to 98% size of the Screen
from IPython.display import display, HTML
display(HTML("<style>.container { width:98% !important; }</style>"))

In [None]:
# !git clone https://github.com/amparore/lime-stratified lime_stratified
# cd lime_stratified
# Changes in Downloaded LIME
# make sure to do changes after downloading the LIME Image

# python setup.py build
# python setup.py install

In [None]:
# It will use the modified code of lime downloaded from https://github.com/amparore/lime-stratified lime_stratified

import lime
from lime import lime_image
lime.__file__

### Import Libraries

In [None]:
import utils
import pandas as pd
import os
import sys
import utils as ut
import importlib
import glob
import numpy as np
from tensorflow.keras.applications.resnet50 import preprocess_input
pd.set_option('display.max_columns', None)

### Setting Path

In [None]:
# Get Current Working Directory and joining subfolders and subfiles path
Main_dir =   os.getcwd()
DS_path =  os.path.join(Main_dir, "data")
result_folder = os.path.join(Main_dir, "result")
json_file    =  os.path.join(DS_path,"imagenet_class_index.json")

In [None]:
# getting ImageNet class names
class_names = ut.get_ImageNet_ClassLabels(json_file) 

## BlackBox Model

Load BlackBox Model, here ResNet50 Model is loaded

In [None]:
model_name = 'ResNet50'
model = ut.load_model(model_name)

In [None]:
# Function to predict image after preprocessing according to ResNet-50 Model on passed image 
def bb_predict(image):
    return model.predict(preprocess_input(image,data_format='channels_last') , verbose=0)

# Hyperparameters

In [None]:
# These hyperparameters can be used to create LIME Image Explanations
batch_size = 600
num_samples = 1000
top_labels = 2

#######################################################################################################
#    Run Experiments using these combinations
## SEGMENTATION PARAMETERS
seg_algo = 'quickshift'
# segs_range_list = [[0,100],[100,200],[200,300]]
segs_range_list = [[0,50],[50,100],[100,150],[150,200]]

hide_color = [None] #[None,0]
use_stratification = [False,True]
distance_function = ['cosine']#,'l1']
#######################################################################################################
#  Results of Experiments
compute_segments = False
images_to_test = 150
plot_prediction = True #  Set it to True if plots for explanations are needed to be plot, Default: False
plot_segments = True
plot_explanation = True
plot_classification_score = True
plot_heatmap = True
plot_image_mask = True 
save_explanations_as_plot = True #  Set it to True if plots for explanations are needed to be saved also, Default: False


### Create Segments 0-100, 100-200, 200-300

In [None]:
importlib.reload(ut)
## IF SEGMENTS FILE NEEDS TO BE GENERATED AGAIN, RUN THIS CELL, OTHERWISE NEXT CELL CAN BE RUN TO LOAD EXISTING SEGMENTATION_PARAM.CSV FILE
segs_param_table_sucess = []

sub_results__ = os.path.join(result_folder,str(segs_range_list))
sub_results_ = os.path.join(sub_results__,str(num_samples))

ut.check_folders(sub_results__)

if compute_segments:
    files= range(0,150,1)
    data_to_csv = dict()
    segs_param_table_sucess = []
    time_stamp = ut.time_stamp()
    for f in files:
        file_name = f'{f+1:08}'
        file = os.path.join(DS_path,'ILSVRC2012_test_'+file_name+'.JPEG')
        print('Filename:',file_name)      
        image   = ut.read_process_image(file,model)
        for srl in segs_range_list:
            target_seg_no = srl[1]
            md,ks,random_seed,ratio = ut.search_segment_number(image, target_seg_no=target_seg_no, init_max_dist=100,init_kernel_size=4,seg_algo=seg_algo)
            segments,segs,segmenter_fn = ut.own_seg(image,md=md,ks=ks,random_seed=random_seed,ratio=ratio)
            segs_param_table = {'filename':file_name,'seg_algo':seg_algo, 'max_distance':md,'kernal_size':ks,'random_seed':random_seed,'ratio':ratio,'segments':segs,'seg_range':srl}
            segs_param_table_sucess.append(segs_param_table)
            print('File:',file_name,' Creating ',segs, ' seg_range : ',srl,' [ ',md,' - ',ks,' ] \n','+'*120)
            df_seg = pd.DataFrame(segs_param_table_sucess)
            df_seg.to_csv(DS_path+'//Segmentation_Table_'+str(model_name)+str(segs_range_list)+'.csv', sep = ';' , index=False)
    df_seg

### LOAD SEGMENTS FILE

In [None]:
# Load Hyper Parametr file to create Segments
# df_seg = pd.read_csv(sub_results__+'//Segmentation_Table_'+str(model_name)+str(segs_range_list)+'_'+str(time_stamp)+'.csv', sep = ';')
df_seg = pd.read_csv(DS_path+'//Segmentation_Table_'+str(model_name)+str(segs_range_list)+'.csv', sep = ';')
# df_n = df_seg.loc[(df_seg['filename'] == int(f'{125:08}'))]
# df_n

df_seg

### Running Experiments and Save Data File

In [None]:
importlib.reload(ut)
files= range(0,150,1)
results_csv = []
#######    To Run for Selective Files
for f in files:
    file_name = f'{f+1:08}'
    file = os.path.join(DS_path,'ILSVRC2012_test_'+file_name+'.JPEG')
    file_name = ut.get_file_name(file)
    sub_results = os.path.join(sub_results_,file_name)        
    ut.check_folders(sub_results)

#       Read and resize image according to model Input Layer
    image   = ut.read_process_image(file,model)
    image_arr = np.expand_dims(image,axis = 0)
    predicted = bb_predict(image_arr)
#         Convert the Predicted into Predicted Class Index (PDI), Class Probability, and Predicted Class Label (PDL)
    (PDI,class_prob,PDL) =  ut.get_class_idx_label_score (predicted,class_names)

#       Plot the blackbox model prediction 
    if plot_prediction:
        ut.plot_save_prediction(image,PDL,class_prob,sub_results,file_name,
                                plot_everything=save_explanations_as_plot,save_image=True)

    df_n = df_seg.loc[(df_seg['filename'] == int(file_name))]
#     iterrows
    for data, row in df_n.T.iteritems():
        filename_seg,md,ks = row.filename,row.max_distance,row.kernal_size
        segments,segs,segmenter_fn = ut.own_seg(image,md=md,ks=ks)
        sr = ut.segs_sections(segs,segs_range_list)
        for hc in hide_color:
            for us in use_stratification:
                for dist_fn in distance_function:
                    if dist_fn=='l1':
                        kernel=(lambda d,kernel_width : kernel_l1_fn(d,kernel_width,segs))
                    else:
                        kernel=None
                    data_to_csv = dict()
                    
#####               Fix Random Seed to make benchmark deterministic and reproducible
                    explainer_lime = lime_image.LimeImageExplainer(random_state=1234, kernel = kernel)
#####               Plot the segments Created 
                    if plot_segments:
                        ut.plot_seg_image(image,segments,md,ks,sub_results,file_name,save_image=True)
#####               Create Explanation
                    explanation_ret = explainer_lime.explain_instance(image, 
                                                     bb_predict,
                                                     hide_color=hc,
                                                     distance_metric=dist_fn,
                                                     top_labels=top_labels,
                                                     batch_size = batch_size,
                                                     use_stratification = us,
                                                     num_samples=num_samples,
                                                     segmentation_fn = segmenter_fn)
#                   Checking the datatype of returned variable from LIME-Image explain_instance function, 
#                                   if it is a tuple then split it into 3 variables
                    if isinstance(explanation_ret, tuple):
                        data, labels,explanation = explanation_ret
                    else:
                        explanation = explanation_ret
                        data, labels = None, None
###############################################    Evaluating EXPLANATIONS  ###########################################################
                    if data is not None and labels is not None:
                        hcc = 'mean-filled' if hc is None else 'zero-filled'
                        ttl = str(segs)+'_'+hcc+'_'+str(us)+'_'+str(num_samples)               
#                       Building a Dictionary with Keys and Values to write into Data File

                        data_to_csv = {'filename':str(file_name),'hide_color':str(hcc),'use_stratification':str(us),'num_samples':str(num_samples),
                        'dist_fn':str(dist_fn),'segments':str(segs),'max_dist':str(md),'kernal_size':str(ks)}
        
#                       Evaluate Explanation and get dictionary back with all evaluation results
                        ut.evaluate_explanation(explanation,data,labels,class_prob,data_to_csv,model_name,sr)
####################################           PLOTTING CLASSIFICATION SCORE           #####################################################
#                 This will generate the Classification Score of Linear Regressor
                        if plot_classification_score:
                            ut.plot_classification_score(explanation,data,labels,class_prob,sub_results,ttl,
                                                         plot_everything=save_explanations_as_plot,draw_quantile=False,save_image=True)

####################################           PLOTTING HEATMAP                   #####################################################
######                      This will generate heatmap plot based on feature importances computed by us from explanation returned by LIME Image Explainer
                        if plot_classification_score:
                            heatmap = ut.fun_create_heatmap_lime(image,explanation,TopLabel,segments)
                            ut.plot_heatmap_lime(heatmap,maxval,sub_results,ttl,save_result=True,
                                                 show_color_bar=False,color_bar_location='right')
                        ######################################################################################
        #                 GET IMAGE AND MASK BY LIME
                        if plot_image_mask:
                            ut.get_img_mask_lime(explanation,TopLabel, sub_results,ttl,save_image=True,positive_only=True, num_features=5, hide_rest=True)
                        results_csv.append(data_to_csv)
#                         print(results_csv)
                        df_data = pd.DataFrame(results_csv)
                        df_data.to_csv(os.path.join(sub_results_+'//'+str(files.start+1)+'_'+str(files.stop)+'_data.csv'), sep = ';', index=False)
                        print('ImageNo: ',file_name,'distance:',dist_fn,', Segs:',str(segs),', Max_Dist: ',md,', Kernal Size: ',ks, ', Hide_Color: ',str(hc),', Use_Stratification: ',str(us),', CV: ',str(data_to_csv['cv_beta']))
                    else:
                        print('Data and Labels needs to be returned from lime_image.py function')

In [None]:
sub_results_

In [None]:
df_data

### Load Existing Data File to Plot Results

In [None]:
df_ld = pd.read_csv (os.path.join(sub_results_+'//'+str(files.start+1)+'_'+str(files.stop)+'_data.csv'), sep = ';')

In [None]:
df_monte = df_ld.loc[(df_ld['use_stratification'] == False)]
df_monte

In [None]:
df_strat = df_ld.loc[(df_ld['use_stratification'] == True)]
df_strat

In [None]:
df_sub = df_ld.loc[(df_ld['filename'] == int(f'{125:08}'))]
df_sub