In [61]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [62]:
import os
import pandas as pd
import numpy as np

from math import isnan
from pathlib import Path
from datetime import datetime
from scipy.stats import bootstrap
from carla.data.catalog import OnlineCatalog

In [63]:
dataset_name = 'give_me_some_credit'
data = OnlineCatalog(data_name=dataset_name)
num_features = len(data.continuous)
#result_path = 'results_remote/results'
result_path = os.path.join("~", "carla", "results")
remove_acdc = False
remove_vaes = False
vaes = ['crud','revise', 'clue']
sess_results_file = False
model_type = "forest"
model_type_dict = "forest"
rms = "acdc_dice_gs_proto_roar_retrained"
rms_array = ["acdc", "dice", "gs", "proto", "roar"]
date = str(datetime.date(datetime.now()))
save_results_path = 'tables/exp_1_CI/{}/'.format(date)
Path(save_results_path).mkdir(parents=True, exist_ok=True)

metrics = ['L1_distance', 'LOF', 'trade_off']

## Read results

In [64]:
recourse_results = pd.read_csv(result_path + '/{}/{}/{}/results.csv'.format(dataset_name, model_type_dict, rms))


In [65]:
result_table = recourse_results[recourse_results['ML_Model'] == model_type]
result_table['L1_distance'] = result_table['L1_distance']/num_features
result_table['trade_off'] = np.sqrt(result_table['LOF'] * result_table['LOF'] + result_table['L1_distance'] * result_table['L1_distance'])

models = []
for type in ['ann', 'linear', 'forest']:
    for i in range(20):
        models.append("success_model_{}_{}".format(type, i))
    result_table["Success_retrained_{}_models".format(type)] = result_table[models].mean(axis=1)
    metrics.append("Success_retrained_{}_models".format(type))

In [66]:
result = pd.DataFrame(dtype="string")
for rm in rms_array:
    result_rm = pd.DataFrame(dtype="string")
    result_rm['Recourse_Method'] = [rm]
    for metric in metrics:
        print("----------------{}_________{}--------------".format(rm, metric))
        data = result_table[result_table['Recourse_Method'] == rm][metric]
        if data.min() == data.max():
            low = data.min()
            high = data.max()
            print("{},{}".format(data.mean(), data.mean()))
        else:
            data = (data, )
            res = bootstrap(data, np.mean)
            low = res.confidence_interval.low
            high = res.confidence_interval.high
            data = data[0]
            print(res.confidence_interval)
        result_rm[metric] = ['{:.2f} ({:.2f}, {:.2f})'.format(data.mean(), low, high )]

    result = pd.concat([result, result_rm])

----------------acdc_________L1_distance--------------
ConfidenceInterval(low=0.10099850021393601, high=0.11543635105807845)
----------------acdc_________LOF--------------
1.0,1.0
----------------acdc_________trade_off--------------
ConfidenceInterval(low=1.0056665831817262, high=1.0074263592917045)
----------------acdc_________Success_retrained_ann_models--------------
1.0,1.0
----------------acdc_________Success_retrained_linear_models--------------
1.0,1.0
----------------acdc_________Success_retrained_forest_models--------------
1.0,1.0
----------------dice_________L1_distance--------------
ConfidenceInterval(low=0.060135734640518676, high=0.07556060215590901)
----------------dice_________LOF--------------
ConfidenceInterval(low=0.48, high=0.67)
----------------dice_________trade_off--------------
ConfidenceInterval(low=0.5160528151774276, high=0.7016959829774932)
----------------dice_________Success_retrained_ann_models--------------
ConfidenceInterval(low=0.41550000000000004, hig

In [67]:
success_result = pd.DataFrame()
for rm in rms_array:
    successful = len(result_table[result_table['Recourse_Method'] == rm])
    result_rm = pd.DataFrame(dtype="string")
    result_rm['Recourse_Method'] = [rm]
    if successful == 100:
        result_rm['Success_Rate'] = ['{:.2f} ({:.2f}, {:.2f})'.format(1.0, 1.0, 1.0 )]
    else:
        success_table = np.array([1]*successful + [0] * (100-successful))
        data = (success_table, )
        res = bootstrap(data, np.mean)
        low = res.confidence_interval.low
        high = res.confidence_interval.high
        data = data[0]
        print(res.confidence_interval)
        result_rm['Success_Rate'] = ['{:.2f} ({:.2f}, {:.2f})'.format(data.mean(), low, high )]
    success_result = pd.concat([success_result, result_rm])

result = result.set_index('Recourse_Method')
success_result = success_result.set_index('Recourse_Method')
result = result.join(success_result, on='Recourse_Method')
#success_table = pd.DataFrame([1]*successful + [0] * (100-successful))
#result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean()

In [68]:
#result['Success_Rate'] = result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean()['Success_Rate']

In [69]:
#order_of_columns = ['Success_Rate', "Success_retrained_ann_models", "Success_retrained_linear_models", "Success_retrained_forest_models", "trade-off", "LOF", "L1_Distance"]
order_of_columns = ['Success_Rate', "Success_retrained_ann_models", "Success_retrained_linear_models", "Success_retrained_forest_models", "trade_off", "LOF", "L1_distance"]
#result = result.join(result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean(), on='Recourse_Method').set_index('Recourse_Method')
result = result.sort_index()
result = result[order_of_columns]
result.to_csv('{}{}_{}.csv'.format(save_results_path, dataset_name, model_type), float_format='%.2f')

#small_result_table.to_csv('{}{}_{}_small.csv'.format(save_results_path, dataset_name, model_type), float_format='%.2f')