In [6]:
import pandas as pd
import pickle
from sklearn.preprocessing import LabelEncoder, StandardScaler, OneHotEncoder
from IPython.display import display
from XAI.nn import BinaryClassifier
import random
import xgboost as xgb
from alibi.explainers import AnchorTabular
from alibi.explainers import CounterFactualProto
import numpy as np


ImportError: cannot import name 'TypeAliasType' from 'typing_extensions' (/Users/eq81tw/Developer/xai-hackathon/venv/lib/python3.9/site-packages/typing_extensions.py)

In [None]:
X_train = pd.read_csv('../data/X_train.csv')
y_train = pd.read_csv('../data/y_train.csv')
X_test = pd.read_csv('../data/X_test.csv')
y_test = pd.read_csv('../data/y_test.csv')

In [None]:
model_rf = pickle.load(open('../models/model_rf.sav', 'rb'))
model_xgb = pickle.load(open('../models/model_xgb.sav', 'rb'))
model_nn = pickle.load(open('../models/model_nn.sav', 'rb'))

In [None]:
# First step in explainable ai.
feat_importances = pd.Series(model_rf.feature_importances_, index=X_train.columns)
feat_importances.nlargest(10).sort_values().plot(kind='barh')

## Explanation through Anchors

In [None]:
def create_rf_explainer(model):
    # Create an anchor explainer for the Random Forest model
    anchor_explainer = AnchorTabular(
        lambda x: model.predict_proba(x),
        list(X_train.columns)
    )
    anchor_explainer.fit(X_train.values)
    return anchor_explainer

def explain_rf(explainer, id_to_explain, threshold=0.85):
    df_id = pd.DataFrame(X_test.loc[id_to_explain]).T
    display(df_id.iloc[:,:10])
    display(df_id.iloc[:,10:])
    prediction = explainer.predictor(X_test.loc[id_to_explain].values.reshape(1, -1))[0]
    print(f'Prediction: {prediction}')

    anchor_explanation = explainer.explain(
        X_test.loc[id_to_explain].values,  # features of the record to explain
        threshold=threshold,
        seed=42
    )
    print('Anchor: %s' % (' AND\r\n\t'.join(anchor_explanation.anchor)))
    print('Precision: %.3f' % anchor_explanation.precision)
    print('Coverage: %.3f' % anchor_explanation.coverage)
    print('\n-----------------------------------------------------------------------------------------------\n')
    
def random_id(df):
    return random.randint(0, df.shape[0])

In [None]:
explainer = create_rf_explainer(model_rf)

### Threshold of 0.85

In [None]:
print("Use threshold of 0.85")
explain_rf(explainer, 3378)
explain_rf(explainer, 4786)
explain_rf(explainer, 3594)
explain_rf(explainer, 7996)
explain_rf(explainer, 12814)

### Threshold of 0.6

In [None]:
print("Use threshold of 0.6")
explain_rf(explainer, 3378, threshold=0.6)
explain_rf(explainer, 4786, threshold=0.6)
explain_rf(explainer, 3594, threshold=0.6)
explain_rf(explainer, 7996, threshold=0.6)
explain_rf(explainer, 12814, threshold=0.6)

### Threshold of 0.99

In [None]:
print("Use threshold of 0.99")
explain_rf(explainer, 3378, threshold=0.99)
explain_rf(explainer, 4786, threshold=0.99)
explain_rf(explainer, 3594, threshold=0.99)
explain_rf(explainer, 7996, threshold=0.99)
explain_rf(explainer, 12814, threshold=0.99)
#explain_rf(explainer, random_id(X_test))

In [None]:
def create_xgb_explainer(model):
    # Create an anchor explainer for the Random Forest model
    anchor_explainer = AnchorTabular(
        lambda x: model.predict(xgb.DMatrix(x)),
        list(X_train.columns)
    )
    anchor_explainer.fit(X_train.as_matrix())
    return anchor_explainer

def explain_xgb(explainer, id_to_explain):
    df_id = pd.DataFrame(X_test.loc[id_to_explain]).T
    display(df_id.iloc[:,:10])
    display(df_id.iloc[:,10:])
    prediction = explainer.predictor(X_test.loc[id_to_explain])[0]
    print(f'Prediction: {prediction}')

    anchor_explanation = explainer.explain(
        X_test.loc[id_to_explain].values,  # features of the record to explain
        threshold=0.85,
        seed=42
    )
    print('Anchor: %s' % (' AND\r\n\t'.join(anchor_explanation.anchor)))
    print('Precision: %.3f' % anchor_explanation.precision)
    print('Coverage: %.3f' % anchor_explanation.coverage)
    print('\n-----------------------------------------------------------------------------------------------\n')
    
explainer = create_xgb_explainer(model_xgb)
explain_xgb(explainer, random_id(X_test))
explain_xgb(explainer, random_id(X_test))
explain_xgb(explainer, random_id(X_test))
explain_xgb(explainer, random_id(X_test))

## Counterfactuals

In [None]:
feature_range = (X_train.values.min(axis=0).reshape(1,18).astype(np.float32),\
                 X_train.values.max(axis=0).reshape(1,18).astype(np.float32))
print(feature_range)

In [None]:
cf_rf_explainer = CounterFactualProto(
    lambda x: model.predict_proba(x),
    X_test.shape,
    max_iterations=100,
    feature_range=feature_range, 
    beta=.1, theta=5,
    use_kdtree=True
)
cf_rf_explainer.fit(X_test.values, d_type='abdm-mvdm')