In [None]:
! pip3 install dalex
! pip install lime

from google.colab import drive
drive.mount('/content/gdrive')

In [None]:
import csv
import dalex as dx
import numpy as np
import pandas as pd
from imblearn.under_sampling import RandomUnderSampler 
from lime import lime_tabular
from numpy import where
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report
from sklearn.model_selection import KFold
from sklearn.utils import resample
np.random.seed(1)

def isfloat(value):
  try:
    float(value)
    return True
  except ValueError:
    return False

def get_rank_diff(list1, list2):
  avg_diff = 0
  for rd_idx in range(len(list1)):
    rank_str = list1[rd_idx]
    diff = 0
    try:
      diff = abs(list2.index(rank_str) - rd_idx)
    except ValueError:
      diff = len(list1)
    avg_diff += diff
  return (avg_diff / len(list1))

def get_hit_rate(list1, list2):
  hr = 0
  for str_hr in list1:
    if str_hr in list2:
      hr += 1
  return (hr / len(list1))

data_list = ['activemq-5.0.0','activemq-5.1.0','activemq-5.2.0','activemq-5.3.0','activemq-5.8.0',
             'camel-1.4.0', 'camel-2.9.0','camel-2.10.0','camel-2.11.0',
             'derby-10.2.1.6','derby-10.3.1.4','derby-10.5.1.1',
             'groovy-1_5_7','groovy-1_6_BETA_1','groovy-1_6_BETA_2',
             'hbase-0.94.0','hbase-0.95.0','hbase-0.95.2',
             'hive-0.9.0','hive-0.10.0','hive-0.12.0',
             'jruby-1.1','jruby-1.4.0','jruby-1.5.0','jruby-1.7.0.preview1',
             'lucene-2.3.0','lucene-2.9.0','lucene-3.0.0','lucene-3.1',
             'wicket-1.3.0-beta2','wicket-1.3.0-incubating-beta-1','wicket-1.5.3']


f_write = open('/content/gdrive/MyDrive/out/rq2_rus_top_15.csv', 'w')
csv_writer = csv.writer(f_write)
csv_writer.writerow(['dataset', 'lime_hit_rate', 'lime_rank_diff', 'dalex_hit_rate', 'dalex_rank_diff', 'lime_order', 'dalex_order', 'total', 'base_shape', 'target_shape'])

for data_files in data_list:
  print(data_files)
  input_df =  pd.read_csv('/content/gdrive/MyDrive/dataset/' + data_files + '.csv')
  input_df = input_df.drop('File', axis=1)
  n_fold = 10
  kfold = KFold(n_fold, True, 1)

  ########## KFold Loop ##########
  for train, test in kfold.split(input_df):
    train_data, test_data = input_df.loc[train], input_df.loc[test]
    #### Separate x and y for both train and test set ####
    train_x = train_data.drop(['HeuBug', 'HeuBugCount', 'RealBug', 'RealBugCount'], axis=1) 
    train_real_y = train_data['RealBug']
    train_real_y = train_real_y.astype('bool')
    feature_names = np.array(list(train_x.columns))

    test_x = test_data.drop(['HeuBug', 'HeuBugCount', 'RealBug', 'RealBugCount'], axis=1)
    test_real_y = test_data['RealBug']
    test_real_y = test_real_y.astype('bool')

    #### apply Spearman Corr based FS on baseline ####
    corr_mat = train_x.corr('spearman')
    corr_features = set()
    for cor_i in range(len(corr_mat.columns)):
      for cor_j in range(cor_i):
        if abs(corr_mat.iloc[cor_i, cor_j]) > 0.7:
          colname = corr_mat.columns[cor_i]
          corr_features.add(colname)
    
    train_x = train_x.drop(labels=corr_features, axis=1)
    test_x = test_x.drop(labels=corr_features, axis=1)

    # num_of_feature = len(train_x.columns)
    num_of_feature = 15

    #### Apply Data Sampling Method ####
    data_sampler = RandomUnderSampler(random_state=42)
    data_sampled_x, data_sampled_y = data_sampler.fit_resample(train_x, train_real_y)
    data_sampled_x = pd.DataFrame(data_sampled_x, columns=train_x.columns)

    #### Train Classifiers (Logistic Regression) ####
    lr = LogisticRegression(random_state=0, solver='liblinear')
    lr_pred = lr.fit(train_x, train_real_y).predict(test_x)

    lr_ds = LogisticRegression(random_state=0)
    lr_ds_pred = lr_ds.fit(data_sampled_x, data_sampled_y).predict(test_x)

    # if nothing predicted as buggy in this fold skip this fold
    # buggy_cnt = np.count_nonzero(lr_pred)
    buggy_cnt = 0
    for bc in range(len(lr_pred)):
      if lr_pred[bc] == True and lr_ds_pred[bc] == True:
        buggy_cnt += 1
    if buggy_cnt == 0:
      n_fold -= 1
      continue

    explainer_lime = lime_tabular.LimeTabularExplainer(train_x.to_numpy(),
                                    mode='classification',
                                    feature_names=train_x.columns.tolist(),
                                    discretize_continuous=True,
                                    random_state=42)
    explainer_lime_ds = lime_tabular.LimeTabularExplainer(data_sampled_x.to_numpy(),
                                    mode='classification',
                                    feature_names=data_sampled_x.columns.tolist(),
                                    discretize_continuous=True,
                                    random_state=42)
    
    exp_dalex = dx.Explainer(lr, train_x, train_real_y, verbose=False)

    exp_dalex_ds = dx.Explainer(lr_ds, data_sampled_x, data_sampled_y, verbose=False)

    avg_hit_rate_lime = 0
    avg_rank_diff_lime = 0
    avg_hit_rate_dx = 0
    avg_rank_diff_dx = 0
    avg_order_lime = 0
    avg_order_dx = 0
    cnt = 0
    for i in range(len(lr_pred)):
      if lr_pred[i] == True and lr_ds_pred[i] == True:
        #### Dalex (BreakDown's new package) ####
        # Create explainer for original model
        breakdown = exp_dalex.predict_parts(test_x.iloc[i], type='break_down', label=str(i))
        # interactions = exp_dalex.predict_parts(test_x.iloc[i], type='break_down_interactions', label=str(i)+'+')
        # breakdown.plot(interactions)
        result_df = breakdown.result
        # print(result_df.iloc[1:].head(top_k_features).to_string)

        # Create Explainer for data sampled model
        breakdown_ds = exp_dalex_ds.predict_parts(test_x.iloc[i], type='break_down', label=str(i))
        # interactions_ds = exp_dalex_ds.predict_parts(test_x.iloc[i], type='break_down_interactions', label=str(i)+'+')
        # breakdown_ds.plot(interactions_ds)
        result_df_ds = breakdown_ds.result
        # print(result_df_ds.iloc[1:].head(top_k_features).to_string)

        #### LIME ####
        exp_lime = explainer_lime.explain_instance(test_x.to_numpy()[i], lr.predict_proba, num_features=num_of_feature)
        lime_result = pd.DataFrame(exp_lime.as_list(), columns=['features','score'])
        
        exp_lime_ds = explainer_lime_ds.explain_instance(test_x.to_numpy()[i], lr_ds.predict_proba, num_features=num_of_feature)
        lime_result_ds = pd.DataFrame(exp_lime_ds.as_list(), columns=['features','score'])

        # get the ranking list of original model (dalex)
        rank_list = result_df['variable_name'].tolist()
        del rank_list[0]
        del rank_list[len(rank_list) - 1]

        # get the ranking list of data sampled model (dalex)
        rank_list_ds = result_df_ds['variable_name'].tolist()
        del rank_list_ds[0]
        del rank_list_ds[len(rank_list_ds) - 1]

        # get the ranking list of original model (lime)
        rank_list_lime = lime_result['features'].tolist()
        for k in range(len(rank_list_lime)):
          split = rank_list_lime[k].split()
          if isfloat(split[0]):
            rank_list_lime[k] = split[2]
          else:
            rank_list_lime[k]= split[0]
        # print(rank_list_lime)

        # get the ranking list of data sampled model(lime)
        rank_list_ds_lime = lime_result_ds['features'].tolist()
        for k in range(len(rank_list_ds_lime)):
          split = rank_list_ds_lime[k].split()
          if isfloat(split[0]):
            rank_list_ds_lime[k] = split[2]
          else:
            rank_list_ds_lime[k]= split[0]
        # print(rank_list_ds_lime)

        # trunc list for BreakDown
        rank_list = rank_list[0:num_of_feature]
        rank_list_ds = rank_list_ds[0:num_of_feature]

        # get ranking difference for dalex and lime
        avg_rank_diff_lime += get_rank_diff(rank_list_lime, rank_list_ds_lime)
        avg_rank_diff_dx += get_rank_diff(rank_list, rank_list_ds)

        # get hit rate for lime only for RQ2. (RQ1 needs for dx too)
        avg_hit_rate_lime += get_hit_rate(rank_list_lime, rank_list_ds_lime)
        avg_hit_rate_dx += get_hit_rate(rank_list, rank_list_ds)

        # get number of instances that has the same order for both LIME and BreakDown
        if get_rank_diff(rank_list_lime, rank_list_ds_lime) == 0:
          avg_order_lime += 1
        if get_rank_diff(rank_list, rank_list_ds) == 0:
          avg_order_dx +=1


        # print(cnt, '/', buggy_cnt)
        cnt += 1

    avg_rank_diff_dx /= buggy_cnt
    avg_rank_diff_lime /= buggy_cnt
    avg_hit_rate_dx /= buggy_cnt
    avg_hit_rate_lime /= buggy_cnt
    avg_order_lime /= buggy_cnt
    avg_order_dx /= buggy_cnt
    row = [data_files, avg_hit_rate_lime, avg_rank_diff_lime, avg_hit_rate_dx, avg_rank_diff_dx, avg_order_lime, avg_order_dx, buggy_cnt, train_x.shape, data_sampled_x.shape]
    csv_writer.writerow(row)
    f_write.flush()
    print(row)
  ########## End of Bootstrap Loop ##########

f_write.close()
print('Done!')