Run Evaluation functions

In [None]:
import os
import project
from project import Project 
import functions
from functions import *
import importlib

In [None]:
# can reload imported lib without restarting kernel after updating lib
importlib.reload(functions)
from functions import *

In [None]:
importlib.reload(project)
from project import *

In [None]:
project_names = ['ANT','CAMEL','JEDIT']
models = ['LR', 'SVM','KNN', 'NB']

In [None]:
# prepare project and model for all 
projects = {}

for project_name in project_names:
    proj = Project(project_name)
    print(proj.name)
    proj.set_train_test()
    projects[project_name] = proj
    for model in models:
        proj.train_global_model(model)
        

In [None]:
# METRIC 1 : local prediction fidelity
# get predictions for global, lime, pyexplainer, shap

df = pd.DataFrame()
for proj in projects:
    curr_project = projects[proj]
    X_train = curr_project.X_train
    y_train = curr_project.y_train

    for model in models:
        global_model = curr_project.models[model]
        
        limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
        pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )
        shapExp,shap_explanations = get_explanations(curr_project, 'shap',model,X_train,y_train,global_model )

        lime_preds = [list(exp['rule'].local_pred.values())[0][0] for exp in lime_explanations] #lime local prediction probabilities
        global_preds = [exp['rule'].predict_proba[1] for exp in lime_explanations] #global pred probas
        py_preds = [exp['local_model'].predict_proba(exp['X_explain'].values)[0][1] for exp in pyexplanations]
        shap_preds = [shapExp.expected_value + sum(exp['shap_values']) for exp in shap_explanations]
        
        model_df = pd.DataFrame(prediction_fidelity(global_preds,lime_preds,py_preds,shap_preds))
        model_df['model'] = model
        model_df['project'] = proj
        df = pd.concat([df,model_df])

print(df)
df.to_csv('eval_results/metric1.csv')


In [None]:
# METRIC 2 : internal fidelity 
model = 'LR'
df = pd.DataFrame()
for proj in projects:
    curr_project = projects[proj]
    X_test = curr_project.X_test
    y_test = curr_project.y_test
    X_train = curr_project.X_train
    y_train = curr_project.y_train

    global_model = curr_project.models[model] 

    test_data_x,test_data_y,_ = curr_project.get_sampled_data(model)
    
    limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
    pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )
    shapExp,shap_explanations = get_explanations(curr_project, 'shap',model,X_train,y_train,global_model )

    model_df = pd.DataFrame(internal_fidelity(global_model,test_data_x,test_data_y,lime_explanations,pyexplanations,shap_explanations))  
    recalls = model_df['recalls']
    model_df = model_df.iloc[:,:-1]
    model_df['model'] = model
    model_df['project'] = proj
    df = pd.concat([df,model_df])
print(df)
df.to_csv('eval_results/metric2.csv')

In [None]:
# METRIC 3 : faithfulness
df = pd.DataFrame()
fs = pd.DataFrame()

for proj in projects:
    curr_project = projects[proj]
    X_train = curr_project.X_train
    y_train = curr_project.y_train
    

    for model in models:
        test_data_x,test_data_y,_ = curr_project.get_sampled_data(model)
        global_model = curr_project.models[model]
        
        limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
        pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )
        shapExp,shap_explanations = get_explanations(curr_project, 'shap',model,X_train,y_train,global_model )

        model_df = pd.DataFrame(faithfulness(global_model, test_data_x, lime_explanations, pyexplanations, shap_explanations))
        for i in range(3):
            testdf = pd.DataFrame()
            score = model_df.iloc[i,2]
            testdf['faithfulness_score'] = score
            testdf['method'] = model_df.iloc[i,0]
            testdf['model'] = model
            testdf['project'] = proj
            fs = pd.concat([fs ,testdf])
        
        faithfulness_scores = model_df[['faithfulness_scores','method']]
        # model_df = model_df.iloc[:,:-1]
        model_df['model'] = model
        model_df['project'] = proj
        df = pd.concat([df,model_df])

print(df)
df.to_csv('eval_results/metric3.csv')
fs.to_csv('eval_results/fs.csv')

In [None]:
# METRIC 4 : monotonicity
df = pd.DataFrame()
for proj in projects:
    curr_project = projects[proj]
    X_train = curr_project.X_train
    y_train = curr_project.y_train
    

    for model in models:
        global_model = curr_project.models[model]
        test_data_x,test_data_y,_ = curr_project.get_sampled_data(model)

        limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
        pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )
        shapExp,shap_explanations = get_explanations(curr_project, 'shap',model,X_train,y_train,global_model )

        model_df = pd.DataFrame(monotonicity(global_model, test_data_x, lime_explanations, pyexplanations, shap_explanations))
        monotonicity_scores = model_df['monotonicity_scores']
        model_df = model_df.iloc[:,:-1]
        model_df['model'] = model
        model_df['project'] = proj
        df = pd.concat([df,model_df])

print(df)
df.to_csv('eval_results/metric4.csv')

In [None]:
# METRIC 5 : uniqueness
df = pd.DataFrame()
for proj in projects:
    curr_project = projects[proj]
    X_train = curr_project.X_train
    y_train = curr_project.y_train
    

    for model in models:
        global_model = curr_project.models[model]
        test_data_x,test_data_y,_ = curr_project.get_sampled_data(model)
        limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
        pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )
        shapExp,shap_explanations = get_explanations(curr_project, 'shap',model,X_train,y_train,global_model )

        model_df = pd.DataFrame(uniqueness(global_model,test_data_x, lime_explanations, pyexplanations, shap_explanations))
        uniqueness_scores = model_df['uniqueness']
        model_df['model'] = model
        model_df['project'] = proj
        df = pd.concat([df,model_df])

print(df)
df.to_csv('eval_results/metric5.csv')

In [None]:
# METRIC 6 : similarity
df = pd.DataFrame()
for proj in projects:
    curr_project = projects[proj]
    X_train = curr_project.X_train
    y_train = curr_project.y_train


    for model in models:
        global_model = curr_project.models[model]
        test_data_x,test_data_y,_ = curr_project.get_sampled_data(model)

        limeExp,lime_explanations = get_explanations(curr_project, 'lime',model,X_train,y_train,global_model )
        pyExp,pyexplanations = get_explanations(curr_project, 'pyExp',model,X_train,y_train,global_model )

        model_df = pd.DataFrame(similarity(test_data_x, lime_explanations, pyexplanations))
        euc_dist_med = model_df['euc_dist_med']
        model_df['model'] = model
        model_df['project'] = proj
        df = pd.concat([df,model_df])

print(df)
df.to_csv('eval_results/metric6.csv')