In [1]:
import numpy as np
import pandas as pd
from time import time
from sklearn.metrics import f1_score
from sklearn.ensemble import IsolationForest
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings('ignore')

import sys
sys.path.append('../')

from ract import Action, RecourseTreeClassifier
from datasets import Dataset

In [2]:
DATASETS = ['c', 'f', 'b', 'p']

COST_TYPE = 'MPS'
COST_BUDGETS = [0.1, 0.2, 0.4, 0.5]

MAX_DEPTH = 64
ALPHAS = {
    'f': [0.0, 0.025, 0.05, 0.075, 0.1], 
    'c': [0.0, 0.025, 0.05, 0.075, 0.1], 
    'p': [0.0, 0.025, 0.05, 0.075, 0.1], 
    'b': [0.0, 0.025, 0.05, 0.075, 0.1], 
}
DELTAS = [0.2, 0.25, 0.3, 0.35, 0.4]

KEYS_GROUPBY = [
    'Budget',
    'Method',
    'Alpha',
    'Delta', 
]
KEYS_MODEL = [
    'Accuracy',
    'F1',
    'Time [s]',
]
KEYS_ACTION = [
    'Cost',
    'Validity',
    'Budget-Validity',
    'Valid-Cost',
    'Recourse', 
    'Sparsity', 
    'Plausibility',
]

In [3]:
def run_holdout_tree(dataset='g', causal=False, n_split=30):
    np.random.seed(0)
    
    D = Dataset(dataset=dataset)
    X, y = D.get_dataset()
    params = D.params
    
    results = {
        'Fold': [],
        'Method': [],
        'Alpha': [],
        'Delta': [], 
        'Budget': [],
        'Accuracy': [],
        'F1': [],
        'Time [s]': [],
        'Cost': [],
        'Validity': [],
        'Budget-Validity': [],
        'Valid-Cost': [], 
        'Recourse': [], 
        'Sparsity': [],
        'Plausibility': [],
    }
    importance = []
    
    for k in range(n_split):
        k = k + 1
        X_tr, X_ts, y_tr, y_ts = train_test_split(X, y)

        iforest = IsolationForest(n_estimators=100).fit(X_tr)
        action = Action(X_tr, y_target=0, cost_type=COST_TYPE, causal=causal, **params)
        
        print('- k = {}'.format(k))
        for cost_budget in COST_BUDGETS:
            action.cost_budget = cost_budget

            print('\t- budget: {}'.format(cost_budget))
            for alpha in ALPHAS[dataset]:
                if alpha == 0.0:
                    clf = RecourseTreeClassifier(action, max_depth=MAX_DEPTH)
                    clf.action.alpha = alpha

                    start = time()
                    clf = clf.fit(X_tr, y_tr)
                    runtime = time() - start
                    X_target = X_ts[clf.predict(X_ts) != action.y_target]
                    result = clf.explain_action(X_target)
        
                    results['Fold'].append(k); results['Method'].append('Baseline'); 
                    results['Alpha'].append(alpha); results['Budget'].append(cost_budget); results['Delta'].append(1.0); 
                    results['Accuracy'].append(clf.score(X_ts, y_ts)); results['F1'].append(f1_score(y_ts, clf.predict(X_ts))); results['Time [s]'].append(runtime); 
                    results['Cost'].append(result['cost'].mean()); results['Validity'].append(result['validity'].mean()); 
                    results['Budget-Validity'].append(result['cost-validity'].mean()); results['Valid-Cost'].append(result['valid-cost'].mean()); 
                    results['Recourse'].append(1 - (~result['cost-validity']).sum() / X_ts.shape[0]); 
                    results['Sparsity'].append(result['valid-sparsity'].mean()); 
                    results['Plausibility'].append(-1 * iforest.score_samples(result['counterfactual']).mean() if X_target.shape[0]!=0 else 0.0); 
                    print('\t\t\t- Method: {} (alp.: {}) | Acc.: {:.4} | Val.: {:.4} | Rec.: {:.4}'.format(results['Method'][-1], alpha, results['Accuracy'][-1], results['Budget-Validity'][-1], results['Recourse'][-1]))
                    importance.append(clf.feature_importances_)


                    clf = RecourseTreeClassifier(action, max_depth=MAX_DEPTH, feature_masking=True)
                    clf.action.alpha = alpha

                    start = time()
                    clf = clf.fit(X_tr, y_tr)
                    runtime = time() - start
                    X_target = X_ts[clf.predict(X_ts) != action.y_target]
                    result = clf.explain_action(X_target)

                    results['Fold'].append(k); results['Method'].append('Baseline (OAF)'); 
                    results['Alpha'].append(alpha); results['Budget'].append(cost_budget); results['Delta'].append(1.0); 
                    results['Accuracy'].append(clf.score(X_ts, y_ts)); results['F1'].append(f1_score(y_ts, clf.predict(X_ts))); results['Time [s]'].append(runtime); 
                    results['Cost'].append(result['cost'].mean()); results['Validity'].append(result['validity'].mean()); 
                    results['Budget-Validity'].append(result['cost-validity'].mean()); results['Valid-Cost'].append(result['valid-cost'].mean()); 
                    results['Recourse'].append(1 - (~result['cost-validity']).sum() / X_ts.shape[0]); 
                    results['Sparsity'].append(result['valid-sparsity'].mean()); 
                    results['Plausibility'].append(-1 * iforest.score_samples(result['counterfactual']).mean() if X_target.shape[0]!=0 else 0.0);                     
                    print('\t\t\t- Method: {} | Acc.: {:.4} | Val.: {:.4} | Rec.: {:.4}'.format(results['Method'][-1], results['Accuracy'][-1], results['Budget-Validity'][-1], results['Recourse'][-1]))
                    importance.append(clf.feature_importances_)


                for delta in DELTAS:
                    clf = RecourseTreeClassifier(action, max_depth=MAX_DEPTH, relabeling=True, delta=delta)
                    clf.action.alpha = alpha

                    start = time()
                    clf = clf.fit(X_tr, y_tr)
                    runtime = time() - start
                    X_target = X_ts[clf.predict(X_ts) != action.y_target]
                    result = clf.explain_action(X_target)
        
                    results['Fold'].append(k); results['Method'].append('RACT'); 
                    results['Alpha'].append(alpha); results['Budget'].append(cost_budget); results['Delta'].append(delta); 
                    results['Accuracy'].append(clf.score(X_ts, y_ts)); results['F1'].append(f1_score(y_ts, clf.predict(X_ts))); results['Time [s]'].append(runtime); 
                    results['Cost'].append(result['cost'].mean()); results['Validity'].append(result['validity'].mean()); 
                    results['Budget-Validity'].append(result['cost-validity'].mean()); results['Valid-Cost'].append(result['valid-cost'].mean()); 
                    results['Recourse'].append(1 - (~result['cost-validity']).sum() / X_ts.shape[0]); 
                    results['Sparsity'].append(result['valid-sparsity'].mean()); 
                    results['Plausibility'].append(-1 * iforest.score_samples(result['counterfactual']).mean() if X_target.shape[0]!=0 else 0.0); 
                    print('\t\t\t- Method: {} (alp.: {} | del.: {}) | Acc.: {:.4} | Val.: {:.4} | Rec.: {:.4}'.format(results['Method'][-1], alpha, delta, results['Accuracy'][-1], results['Budget-Validity'][-1], results['Recourse'][-1]))
                    importance.append(clf.feature_importances_)
       
    
    results = pd.DataFrame(results)
    results.to_csv('../res/appendix/budget/{}_tree{}.csv'.format(dataset, '_causal' if causal else ''), index=False)
    
    importance = pd.DataFrame(np.array(importance), columns=D.feature_names)
    importance = pd.concat([results[['Fold', 'Method', 'Alpha', 'Budget', 'Delta']], importance], axis=1)
    importance.to_csv('../res/appendix/budget/fi_{}_tree{}.csv'.format(dataset, '_causal' if causal else ''), index=False)


In [4]:
from datasets import DATASET_FULLNAMES

for dataset in DATASETS: 
    print('# {}'.format(DATASET_FULLNAMES[dataset]))
    run_holdout_tree(dataset)

# COMPAS
- k = 1
	- budget: 0.1
			- Method: Baseline (alp.: 0.0) | Acc.: 0.6667 | Val.: 0.2717 | Rec.: 0.6783
			- Method: Baseline (OAF) | Acc.: 0.6641 | Val.: 0.2531 | Rec.: 0.6518
			- Method: RACT (alp.: 0.0 | del.: 0.2) | Acc.: 0.5538 | Val.: 0.4295 | Rec.: 0.9449
			- Method: RACT (alp.: 0.0 | del.: 0.25) | Acc.: 0.5538 | Val.: 0.4295 | Rec.: 0.9449
			- Method: RACT (alp.: 0.0 | del.: 0.3) | Acc.: 0.5538 | Val.: 0.4295 | Rec.: 0.9449
			- Method: RACT (alp.: 0.0 | del.: 0.35) | Acc.: 0.5538 | Val.: 0.4295 | Rec.: 0.9449
			- Method: RACT (alp.: 0.0 | del.: 0.4) | Acc.: 0.6667 | Val.: 0.2717 | Rec.: 0.6783
			- Method: RACT (alp.: 0.025 | del.: 0.2) | Acc.: 0.6556 | Val.: 0.4547 | Rec.: 0.8048
			- Method: RACT (alp.: 0.025 | del.: 0.25) | Acc.: 0.6556 | Val.: 0.4547 | Rec.: 0.8048
			- Method: RACT (alp.: 0.025 | del.: 0.3) | Acc.: 0.6654 | Val.: 0.4492 | Rec.: 0.7575
			- Method: RACT (alp.: 0.025 | del.: 0.35) | Acc.: 0.6654 | Val.: 0.4492 | Rec.: 0.7575
			- Method: RACT (al