In [1]:
from emutils.imports import *
from emutils.model.evaluate.metrics import xgboost_complexity

from utils import *

Python  3.6.13
Python Executable: /home/ubuntu/anaconda3/envs/cfshap22/bin/python
CWD =  /home/ubuntu/air/xai/cfshap-release/cf-shap-facct22
NumPy 1.19.2 | Pandas 1.1.5 | SciPy 1.5.2 | NetworkX 2.5.1 | StatsModels 0.12.2
scikit-learn 0.24.2 | xgboost 1.3.3 
MatPlotLib 3.3.4 | Seaborn 0.11.1 | 





In [2]:
from constants import DATA_DIR, MODEL_DIR, EXPLANATIONS_DIR

parser = ArgumentParser(sys.argv)

# General
parser.add_argument('--data_path', type=str, default=DATA_DIR, required=False)
parser.add_argument('--model_path', type=str, default=MODEL_DIR)
parser.add_argument('--explanations_path', type=str, default=EXPLANATIONS_DIR)

args, unknown = parser.parse_known_args()
args = attrdict(vars(args))

# Data & Models
## Load

In [3]:
MODELS = [
    ('heloc', 'v2', 'v5'), 
    ('lendingclub', 'v2', 'v5'), 
    ('wines', 'v2', 'v5'), 
]

In [4]:
data = [
    attrdict({
        **{
            "Dataset": f"{dataset} ({data_version})",
            "Model": f"XGB ({model_version})"
        },
        **load_data_and_model(dataset, data_version, model_version, args),
    })
    for dataset, data_version, model_version in tqdm(MODELS)
]

  0%|          | 0/3 [00:00<?, ?it/s]

## Information

In [5]:
from sklearn.metrics import roc_auc_score, recall_score, accuracy_score, f1_score

datasets = pd.DataFrame([
    {
        'Dataset': dataset.Dataset,
        'Model': dataset.Model,
        'Features': dataset.X.shape[1],
        'Tot': dataset.X.shape[0],
        'Train': dataset.X_train.shape[0],
        'Test': dataset.X_test.shape[0],
        # 'Bad': dataset.X_bad.shape[0],
        # 'Good': dataset.X_good.shape[0],
        'Bad Train': dataset.y_train.values.sum() / dataset.y_train.shape[0] * 100,
        'Bad Test': dataset.y_test.values.sum() / dataset.y_test.shape[0] * 100,
        'Bad Tot': dataset.y.values.sum() / dataset.y.shape[0] * 100,
        'Bad Model': dataset.model.predict(dataset.X.values).sum() / dataset.y.shape[0] * 100,
        'Complexity (#leaves)': xgboost_complexity(dataset.model),
        'Threshold': np.round(dataset.model.threshold, 4),
        'ROC-AUC': roc_auc_score(dataset.y_test.values.flatten(), dataset.model.predict_proba(dataset.X_test.values)[:,1]) * 100,
        'Recall': recall_score(dataset.y_test.values.flatten(), dataset.model.predict(dataset.X_test.values)) * 100,
        'Fall-out': 1 - recall_score(1 - dataset.y_test.values.flatten(), 1 - dataset.model.predict(dataset.X_test.values)) * 100,
        'Accuracy': accuracy_score(dataset.y_test.values.flatten(), dataset.model.predict(dataset.X_test.values)) * 100,
        'F1': f1_score(dataset.y_test.values.flatten(), dataset.model.predict(dataset.X_test.values)) * 100,
    }
    for dataset in tqdm(data)
]).sort_values(['Model', 'Dataset']).set_index(['Model', 'Dataset'], drop=True)
datasets['ΔBad'] = datasets['Bad Model'] - datasets['Bad Tot']
for col in datasets.columns.values:
    if col in ['Bad Train', 'Bad Test', 'Bad Tot', 'ΔBad', 'Bad Model', 'Recall', 'Fall-out', 'ROC-AUC']:
        datasets[col] = datasets[col].apply(lambda x : f'{x:0.1f}%')
datasets

  0%|          | 0/3 [00:00<?, ?it/s]

Unnamed: 0_level_0,Unnamed: 1_level_0,Features,Tot,Train,Test,Bad Train,Bad Test,Bad Tot,Bad Model,Complexity (#leaves),Threshold,ROC-AUC,Recall,Fall-out,Accuracy,F1,ΔBad
Model,Dataset,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1
XGB (v5),heloc (v2),23,9871,6909,2962,52.0%,52.0%,52.0%,59.6%,446,0.3985,79.6%,81.6%,-62.5%,72.957461,75.851673,7.6%
XGB (v5),lendingclub (v2),20,1373324,961326,411998,19.1%,27.0%,21.5%,56.9%,1747,0.3824,69.6%,79.8%,-46.2%,56.04566,49.534473,35.4%
XGB (v5),wines (v2),11,4898,3428,1470,66.5%,66.5%,66.5%,61.2%,372,0.4614,83.2%,80.7%,-72.4%,78.231293,83.140148,-5.3%


# Explanations
Note: this can be run only after the geration of the explanations

## Load

In [6]:
regex = re.compile("([a-z]+)_D([a-z0-9]+)M([a-z0-9]+)_([a-z]+)_([a-z]+)_([QLa-z0-9_]+)(_B)*([v][0-9a-z_]+)\.pkl")

expl = []
for filename in os.listdir(args.explanations_path):
    match = regex.match(filename)
    if match:
        # print(match.groups())
        dataset, data_version, model_version, model_type, what, explanation, raw_background, version = match.groups()
        expl.append(attrdict({
                "Dataset": f"{dataset} ({data_version})",
                "Model": f"{model_type} ({model_version})",
                "Explanation": f"{explanation}",
                "Part": f"{what}{'_BACK' if raw_background is not None else ''}",
                "Version": version,
        }))

## What explanations have already been computed?

In [7]:
df = pd.DataFrame(expl)
df['Exists'] = '$\times$'
df = df.pivot(columns=['Explanation'], index = ['Dataset', 'Model', 'Part', 'Version'], values = ['Exists'])
df = df.fillna('')
df

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists,Exists
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,Explanation,all,diff_label,diff_label_100,diff_pred,diff_pred_100,knn1000_qL1,knn100_qL1,knn10_qL1,knn1_qL1,knn20_qL1,knn250_qL1,knn3_qL1,knn500_qL1,knn50_qL1,knn5_qL1,training,training_100
Dataset,Model,Part,Version,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2
heloc (v2),xgb (v5),backgrounds,v5_close,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),backgrounds,v5_far,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),meta,v5_close,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),meta,v5_far,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),trends,v5_close,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),trends,v5_far,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),values,v5_close,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),values,v5_far,$\times$,,,,,,,,,,,,,,,,
heloc (v2),xgb (v5),xps,v5_close,,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$
heloc (v2),xgb (v5),xps,v5_far,,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$,$\times$
