In [86]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [87]:
import os
import pandas as pd
import numpy as np

from math import isnan
from pathlib import Path
from datetime import datetime
from scipy.stats import bootstrap
from carla.data.catalog import OnlineCatalog

In [88]:
dataset_name = 'spotify_classic'
model_type = "ann"
model_type_dict = "ann"
data = OnlineCatalog(data_name=dataset_name)
num_features = len(data.continuous)
#result_path = 'results_remote/results'
result_path = os.path.join("~", "carla", "results")
remove_acdc = False
remove_vaes = False
vaes = ['crud','revise', 'clue']
sess_results_file = False
suffix = ''



rms = "dice_cchvae_cruds_proto_roar_wachter_gs_acdc_retrained"
rms_array = ["dice", "cchvae", "cruds", "proto", "roar", "wachter", "gs", "acdc"]
date = str(datetime.date(datetime.now()))
save_results_path = 'tables/appendix_carla_CI/{}/'.format(date)
Path(save_results_path).mkdir(parents=True, exist_ok=True)

if dataset_name == 'breast_cancer' or dataset_name == 'give_me_some_credit':
    metrics = ['trade_off', 'LOF',  'L1_distance', 'Constraint_Violation']
    metrics_non_model = metrics.copy()
else:
    metrics = ['trade_off', 'LOF',  'L1_distance', 'avg_time']
    metrics_non_model = metrics.copy()

if model_type == 'forest':
    rms = 'acdc_dice_gs_proto_roar_retrained'
    rms_array = ['acdc', 'dice', 'gs', 'proto', 'roar']
elif dataset_name == 'mnist':
    rms = 'dice_proto_roar_wachter_gs_acdc_retrained'
    rms_array = ['dice', 'proto', 'roar', 'wachter', 'gs', 'acdc']

rms = 'crud_retrained'
rms_array = ['crud']

suffix = '_crud'


metrics = [
                        "L0_distance",
                        "L1_distance",
                        "L2_distance",
                        "Linf_distance",
                        "Constraint_Violation",
                        "Redundancy",
                        "y-Nearest-Neighbours",
                        "avg_time"
                    ]

distance_metrics = [    "L0_distance",
                        "L1_distance",
                        "L2_distance",
                        "Linf_distance",
                        "Redundancy"]


## Read results

In [89]:
recourse_results = pd.read_csv(result_path + '/{}/{}/{}/results.csv'.format(dataset_name, model_type_dict, rms))


In [90]:
result_table = recourse_results[recourse_results['ML_Model'] == model_type]

result_table[distance_metrics] = result_table[distance_metrics]/num_features


In [91]:
result = pd.DataFrame(dtype="string")
for rm in rms_array:
    result_rm = pd.DataFrame(dtype="string")
    result_rm['Recourse_Method'] = [rm]
    for metric in metrics:
        print("----------------{}_________{}--------------".format(rm, metric))
        data = result_table[result_table['Recourse_Method'] == rm][metric]
        if data.min() == data.max():
            low = data.min()
            high = data.max()
            print("{},{}".format(data.mean(), data.mean()))
        else:
            data = (data, )
            res = bootstrap(data, np.mean)
            low = res.confidence_interval.low
            high = res.confidence_interval.high
            data = data[0]
            print(res.confidence_interval)
        result_rm[metric] = ['{:.2f} ({:.2f}, {:.2f})'.format(data.mean(), low, high )]

    result = pd.concat([result, result_rm])

----------------crud_________L0_distance--------------
1.0,1.0
----------------crud_________L1_distance--------------
ConfidenceInterval(low=0.2983116684457314, high=0.32447693842739916)
----------------crud_________L2_distance--------------
ConfidenceInterval(low=0.15254645471089573, high=0.1729537907087135)
----------------crud_________Linf_distance--------------
ConfidenceInterval(low=0.06879662856553051, high=0.07354017824550092)
----------------crud_________Constraint_Violation--------------
0.0,0.0
----------------crud_________Redundancy--------------
ConfidenceInterval(low=0.9190909090909087, high=0.9318181818181815)
----------------crud_________y-Nearest-Neighbours--------------
1.0,1.0
----------------crud_________avg_time--------------
2.265896280538291,2.265896280538291


In [92]:
success_result = pd.DataFrame()
for rm in rms_array:
    successful =int(result_table[result_table['Recourse_Method'] == rm]['Success_Rate'].mean() * 100)
    result_rm = pd.DataFrame(dtype="string")
    result_rm['Recourse_Method'] = [rm]
    if successful == 100:
        result_rm['Success_Rate'] = ['{:.2f} ({:.2f}, {:.2f})'.format(1.0, 1.0, 1.0 )]
    else:
        success_table = np.array([1]*successful + [0] * (100-successful))
        data = (success_table, )
        res = bootstrap(data, np.mean)
        low = res.confidence_interval.low
        high = res.confidence_interval.high
        data = data[0]
        print(res.confidence_interval)
        result_rm['Success_Rate'] = ['{:.2f} ({:.2f}, {:.2f})'.format(data.mean(), low, high )]
    success_result = pd.concat([success_result, result_rm])

result = result.set_index('Recourse_Method')
success_result = success_result.set_index('Recourse_Method')
result = result.join(success_result, on='Recourse_Method')
#success_table = pd.DataFrame([1]*successful + [0] * (100-successful))
#result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean()

In [93]:
#result['Success_Rate'] = result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean()['Success_Rate']

In [94]:
#order_of_columns = ['Success_Rate', "Success_retrained_ann_models", "Success_retrained_linear_models", "Success_retrained_forest_models", "trade-off", "LOF", "L1_Distance"]
order_of_columns = ['Success_Rate' ] + metrics
#result = result.join(result_table[['Success_Rate', "Recourse_Method"]].groupby('Recourse_Method').mean(), on='Recourse_Method').set_index('Recourse_Method')
result = result.sort_index()
result = result[order_of_columns]
result.to_csv('{}{}_{}.csv'.format(save_results_path, dataset_name, model_type + suffix), float_format='%.2f')

#small_result_table.to_csv('{}{}_{}_small.csv'.format(save_results_path, dataset_name, model_type), float_format='%.2f')