## Benchmarking on anchor

Using the anchor package as provided on https://github.com/marcotcr/anchor/tree/master

The package works with numpy arrays (instead of data pandas) and can be used for encoded datasets. Since we use our own encoder, some bits of the code had to be adapted.

1) dataset loading, splitting as in the other cases
2) learn ML model (XGB for REASONX case c), compute accuracy
3) pick instance from the test set, generate neighborhood, learn surrogate, compute explanation, compute fidelity and measures
4) compute mean fidelity, compute mean of the measures

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

from anchor import utils
from anchor import anchor_tabular

import sys
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import tree
from sklearn.tree import DecisionTreeClassifier
import sklearn.ensemble 
import matplotlib.pyplot as plt
import pydotplus
from IPython.display import Image
from xgboost import XGBClassifier

# local imports
sys.path.append('../src/') # local path
import dautils

from helper_functions import read_adult, read_give_me_some_credit, read_south_german_credit

In [2]:
# dataset execution, copied
simplified=False
continuous_only=False

dataset = "adult"

# read dataset
if dataset == "gmsc":
    df, pred_atts, target, df_code = read_give_me_some_credit(continuous_only=continuous_only, simplified=simplified)
if dataset == "sgc":
    df, pred_atts, target, df_code = read_south_german_credit(continuous_only=continuous_only, simplified=simplified)
if dataset == "adult":
    df, pred_atts, target, df_code = read_adult(continuous_only=continuous_only, simplified=simplified)

In [3]:
# encode df
df_encoded_onehot = df_code.fit_transform(df)
# encoded atts names
encoded_pred_atts = df_code.encoded_atts(pred_atts)
df_encoded_onehot.head()

Unnamed: 0,race_AmerIndianEskimo,race_AsianPacIslander,race_Black,race_Other,race_White,sex_Female,sex_Male,workclass_Federalgov,workclass_Localgov,workclass_Neverworked,...,workclass_Selfempinc,workclass_Selfempnotinc,workclass_Stategov,workclass_Withoutpay,education,age,capitalgain,capitalloss,hoursperweek,class
0,0,0,0,0,1,0,1,0,0,0,...,0,0,1,0,13,39,2174,0,40,0
1,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,13,50,0,0,13,0
2,0,0,0,0,1,0,1,0,0,0,...,0,0,0,0,9,38,0,0,40,0
3,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,7,53,0,0,40,0
4,0,0,1,0,0,1,0,0,0,0,...,0,0,0,0,13,28,0,0,40,0


In [4]:
novel = df_encoded_onehot * 1
novel.head()

Unnamed: 0,race_AmerIndianEskimo,race_AsianPacIslander,race_Black,race_Other,race_White,sex_Female,sex_Male,workclass_Federalgov,workclass_Localgov,workclass_Neverworked,...,workclass_Selfempinc,workclass_Selfempnotinc,workclass_Stategov,workclass_Withoutpay,education,age,capitalgain,capitalloss,hoursperweek,class
0,0,0,0,0,1,0,1,0,0,0,...,0,0,1,0,13,39,2174,0,40,0
1,0,0,0,0,1,0,1,0,0,0,...,0,1,0,0,13,50,0,0,13,0
2,0,0,0,0,1,0,1,0,0,0,...,0,0,0,0,9,38,0,0,40,0
3,0,0,1,0,0,0,1,0,0,0,...,0,0,0,0,7,53,0,0,40,0
4,0,0,1,0,0,1,0,0,0,0,...,0,0,0,0,13,28,0,0,40,0


In [5]:
# dataset partition, convert datasets to numpy, train DT

X, y = novel[encoded_pred_atts], novel[target]
# retain test sets
X1, XT1, y1, yt1 = train_test_split(X, y, test_size=0.3, random_state=42)
X2, XT2, y2, yt2 = train_test_split(X, y, test_size=0.3, random_state=24)

X1_numpy = X1.to_numpy()
y1_numpy = y1.to_numpy()

tree_numpy = DecisionTreeClassifier(max_depth = 3)
tree_numpy.fit(X1_numpy, y1_numpy)

print("accuracy tree            ", tree_numpy.score(XT1.to_numpy(), yt1.to_numpy()))

xgb = XGBClassifier(random_state = 0)
xgb.fit(X1_numpy, y1_numpy)
xgb_label = xgb.predict(XT1.to_numpy())

xgb_ = XGBClassifier(random_state = 0)
xgb_.fit(X1,y1)

print("accuracy XGB             ", xgb.score(XT1.to_numpy(), yt1.to_numpy()))
print("accuracy XGB (pd)        ", xgb_.score(XT1, yt1))

ml_model = xgb

accuracy tree             0.801610591687709
accuracy XGB              0.850406060192452
accuracy XGB (pd)         0.850406060192452


In [6]:
# initialize the explanator on encoded data

explainer = anchor_tabular.AnchorTabularExplainer(
    ["0", "1"],
    encoded_pred_atts,
    X1_numpy)
    
#dataset.class_names
#dataset.feature_names
#dataset.train
#dataset.categorical_names
#encoder_fn=encoder.transform

In [7]:
# convert dataset to numpy, define lambda function, predict and initialize single explanation

# THRESHOLD PARAMETER
# "Note that we set threshold to 0.95, so we guarantee (with high probability) that precision will be above 0.95 - that is, that predictions on 
# instances where the anchor holds will be the same as the original prediction at least 95% of the time." (github)

instances = 100
threshold = [0.9, 0.95, 0.99]

numpy_test = XT1.to_numpy()

rule_length = np.empty((len(threshold),instances))
rule_length[:] = np.nan
precision = np.empty((len(threshold),instances))
precision[:] = np.nan
coverage = np.empty((len(threshold),instances))
coverage[:] = np.nan

predict_fn = lambda x: ml_model.predict(x)

for j in range(len(threshold)):
    for i in range(instances):
        # prediction
        # uses assigned labels from above (initialization of explainer)
        print("Prediction: ", explainer.class_names[predict_fn(numpy_test[i].reshape(1,-1))[0]])
        
        # generate explanation
        #exp = explainer.explain_instance(numpy_test[i], ml_model.predict, threshold=threshold[j])
        exp = explainer.explain_instance(numpy_test[i], predict_fn, threshold=threshold[j])
        # results
        print('Anchor: %s' % (' AND '.join(exp.names())))
        
        # EVALUATION
        # length of the rules
        rule_length[j, i] = len(exp.names())
        precision[j, i] = exp.precision()
        coverage[j, i] = exp.coverage()

Prediction:  0
Anchor: age <= 28.00
Prediction:  0
Anchor: age <= 28.00
Prediction:  0
Anchor: age <= 28.00
Prediction:  0
Anchor: age <= 28.00
Prediction:  0
Anchor: education <= 12.00
Prediction:  0
Anchor: age <= 37.00 AND race_Black > 0.00
Prediction:  0
Anchor: education <= 10.00
Prediction:  0
Anchor: age <= 28.00
Prediction:  0
Anchor: sex_Female > 0.00
Prediction:  0
Anchor: education <= 9.00
Prediction:  1
Anchor: capitalloss > 0.00 AND education > 12.00 AND age > 37.00 AND sex_Female <= 0.00 AND 0.00 < sex_Male <= 1.00 AND workclass_Federalgov <= 0.00 AND workclass_Stategov <= 0.00 AND workclass_Selfempnotinc <= 0.00 AND race_White <= 1.00 AND race_AmerIndianEskimo <= 0.00 AND workclass_Neverworked <= 0.00 AND workclass_Withoutpay <= 0.00 AND race_Other <= 0.00 AND race_AsianPacIslander <= 0.00
Prediction:  0
Anchor: age <= 37.00
Prediction:  0
Anchor: sex_Female > 0.00
Prediction:  0
Anchor: education <= 9.00
Prediction:  0
Anchor: age <= 37.00
Prediction:  0
Anchor: educati

In [8]:
print("length of rules  ", np.mean(rule_length, axis = 1))
print("precision        ", np.mean(precision, axis = 1))
print("coverage         ", np.mean(coverage, axis = 1))

length of rules   [2.06 3.04 4.55]
precision         [0.90999151 0.94360567 0.96644717]
coverage          [0.433322 0.351893 0.272606]


In [10]:
print(precision[1])

[0.9801444  0.9875     0.98465473 0.97887324 0.96551724 0.96153846
 0.96650718 1.         0.9676259  0.98823529 0.5963734  0.97797357
 0.9731405  0.97292724 0.98694517 0.9505814  0.96244131 0.62371487
 0.96048632 0.9829932  0.97749196 0.95303867 0.99579832 0.95595238
 0.96098563 0.96233522 0.96837945 0.62451513 0.958159   1.
 0.9601227  0.96428571 0.6744044  0.9704918  0.97560976 0.96210526
 0.96078431 0.98697068 0.96787149 0.95383275 0.97399527 0.81453634
 0.95820896 0.95748988 0.95692308 1.         0.95681063 0.98203593
 0.98051948 0.96145125 0.95086705 0.97647059 0.98370672 0.95488722
 0.95850622 0.97844828 0.96127946 0.98025135 0.99324324 1.
 0.96813725 0.96828358 0.96046512 0.97376543 0.77923628 0.96242775
 0.98677249 0.93846154 0.98550725 0.98543689 0.95575221 0.97909408
 0.96267496 0.97628458 0.97345133 0.97455471 0.96721311 0.95705521
 0.95307918 0.34886818 0.97154472 0.95588235 0.96581197 0.95481928
 0.96901408 0.96101365 0.97158322 0.99270073 0.95618153 0.95938104
 0.96363636