In [1]:
import numpy as np
import pandas as pd
from tqdm import tqdm
from time import time
from sklearn.metrics import f1_score, roc_auc_score
np.random.seed(0)

from rabit import Action, RecourseBoostingClassifier, RecourseExplainer
from rabit.datasets import FicoDataset, CompasDataset, AdultDataset, BailDataset

## Exp 1. Baseline Comparison and Trade-off Analysis

In [2]:
def run_comparison(dataset, n_iter=10, n_estimators=100, cost_budget=0.2):
    
    results = {
        'dataset': [],
        'method': [],
        'n_estimators': [],
        'gamma': [],
        'cost_budget': [],
        'time': [],
        'accuracy': [],
        'f1': [],
        'AUC': [],
        'recourse': [],
        'validity': [],
        'cost': [],
        'sparsity': [],
        'plausibility': [],
        'unfairness': [],
    }
    
    print('Running {} dataset'.format(dataset.name))
    for _ in tqdm(range(n_iter)):

        X_tr, X_ts, y_tr, y_ts = dataset.get_dataset(split=True)
        action = Action(dataset.params, cost_budget=cost_budget)
        action = action.fit(X_tr, y_tr)
        
        done_vanilla = False
        for gamma in [0.0, 0.0, 0.0005, 0.001, 0.0015, 0.002, 0.0025, 0.003, 0.0035, 0.004, 0.0045, 0.005]:
            results['dataset'].append(dataset.name)
            if gamma == 0.0:
                if done_vanilla:
                    method = 'OAF'
                else:
                    method = 'Vanilla'
                    done_vanilla = True
            else:
                method = 'RABIT'
            results['method'].append(method)
            results['n_estimators'].append(n_estimators)
            results['gamma'].append(gamma)
            results['cost_budget'].append(cost_budget)

            start = time()
            estimator = RecourseBoostingClassifier(action, n_estimators=n_estimators, gamma=gamma, only_actionable_features=(method == 'OAF'))        
            estimator = estimator.fit(X_tr, y_tr)
            results['time'].append(time() - start)
            results['accuracy'].append(estimator.score(X_ts, y_ts))
            results['f1'].append(f1_score(y_ts, estimator.predict(X_ts)))
            results['AUC'].append(roc_auc_score(y_ts, estimator.predict_proba(X_ts)[:, 1]))
            
            explainer = RecourseExplainer(estimator, action)
            recourse = explainer.explain_recourse(X_ts)
            results['recourse'].append(recourse.get_recourse())
            results['validity'].append(recourse.get_validity())
            results['cost'].append(recourse.get_cost())
            results['sparsity'].append(recourse.get_sparsity())
            results['plausibility'].append(recourse.get_plausibility())
            
            if dataset.name in ['COMPAS', 'Adult']:
                sensitive_indices = dataset.get_sensitive_indices()
                results['unfairness'].append(recourse.get_unfairness(sensitive_indices))
            else:
                results['unfairness'].append(0.0)
                                
    results = pd.DataFrame(results)    
    return results

In [3]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    result = run_comparison(dataset)
    results.append(result) 
    
results_comparison = pd.concat(results)
results_comparison.to_csv('./results/results_comparison.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [27:40<00:00, 166.06s/it]


Running COMPAS dataset


100%|██████████| 10/10 [06:36<00:00, 39.70s/it]


Running Adult dataset


100%|██████████| 10/10 [1:17:20<00:00, 464.02s/it]


Running Bail dataset


100%|██████████| 10/10 [08:52<00:00, 53.24s/it]


## Exp 2. Leaf Refinement

In [4]:
def run_weights(dataset, n_iter=10, n_estimators=100, cost_budget=0.2):
    
    results = {
        'dataset': [],
        'method': [],
        'n_estimators': [],
        'eta': [],
        'cost_budget': [],
        'accuracy': [],
        'f1': [],
        'AUC': [],
        'recourse': [],
        'validity': [],
        'cost': [],
    }

    print('Running {} dataset'.format(dataset.name))
    for _ in tqdm(range(n_iter)):

        X_tr, X_vl, X_ts, y_tr, y_vl, y_ts = dataset.get_dataset(split=True, test_size=0.25, validation_size=0.25)
        sensitive_indices = dataset.get_sensitive_indices()
        action = Action(dataset.params, cost_budget=cost_budget)
        action = action.fit(X_tr, y_tr)

        for method in ['Vanilla', 'OAF', 'RABIT']:
            gamma = 0.002 if method == 'RABIT' else 0.0
            estimator = RecourseBoostingClassifier(action, n_estimators=n_estimators, gamma=gamma, only_actionable_features=(method == 'OAF'))        
            estimator = estimator.fit(X_tr, y_tr)            
            explainer = RecourseExplainer(estimator, action)
            X_cf = explainer.generate_recourse_calibration_samples(X_vl)
                            
            for eta in [0.01, 0.02, 0.04, 0.08, 0.16, 0.32, 0.64, 1.28]:
                estimator = estimator.optimize_weights(X_vl, y_vl, X_cf, eta=eta)
                recourse = explainer.explain_recourse(X_ts)
                results['dataset'].append(dataset.name)
                results['method'].append(method)
                results['n_estimators'].append(n_estimators)
                results['eta'].append(eta)
                results['cost_budget'].append(cost_budget)
                results['accuracy'].append(estimator.score(X_ts, y_ts))
                results['f1'].append(f1_score(y_ts, estimator.predict(X_ts)))
                results['AUC'].append(roc_auc_score(y_ts, estimator.predict_proba(X_ts)[:, 1]))
                results['recourse'].append(recourse.get_recourse())
                results['validity'].append(recourse.get_validity())
                results['cost'].append(recourse.get_cost())
            
    results = pd.DataFrame(results)    
    return results

In [5]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    result = run_weights(dataset)
    results.append(result) 
    
results_weights = pd.concat(results)
results_weights.to_csv('./results/results_weights.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [12:22<00:00, 74.30s/it]


Running COMPAS dataset


100%|██████████| 10/10 [06:57<00:00, 41.77s/it]


Running Adult dataset


100%|██████████| 10/10 [42:25<00:00, 254.51s/it]


Running Bail dataset


100%|██████████| 10/10 [05:52<00:00, 35.28s/it]


## Exp 3. Sensitivity Analyses (Appendix)

In [8]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    for n_estimators in [50, 150, 200, 250]:
        result = run_comparison(dataset, n_estimators=n_estimators)
        results.append(result) 
    
results_trees = pd.concat(results)
results_trees = pd.concat([results_trees, results_comparison])
results_trees.to_csv('./results/results_trees.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [16:20<00:00, 98.08s/it]


Running FICO dataset


100%|██████████| 10/10 [41:54<00:00, 251.41s/it]


Running FICO dataset


100%|██████████| 10/10 [54:34<00:00, 327.49s/it]


Running FICO dataset


100%|██████████| 10/10 [1:09:52<00:00, 419.22s/it]


Running COMPAS dataset


100%|██████████| 10/10 [04:48<00:00, 28.90s/it]


Running COMPAS dataset


100%|██████████| 10/10 [10:08<00:00, 60.89s/it]


Running COMPAS dataset


100%|██████████| 10/10 [12:29<00:00, 74.99s/it]


Running COMPAS dataset


100%|██████████| 10/10 [14:40<00:00, 88.07s/it]


Running Adult dataset


100%|██████████| 10/10 [44:31<00:00, 267.13s/it]


Running Adult dataset


100%|██████████| 10/10 [1:50:06<00:00, 660.69s/it]


Running Adult dataset


100%|██████████| 10/10 [2:25:08<00:00, 870.89s/it] 


Running Adult dataset


100%|██████████| 10/10 [2:59:20<00:00, 1076.09s/it] 


Running Bail dataset


100%|██████████| 10/10 [05:05<00:00, 30.58s/it]


Running Bail dataset


100%|██████████| 10/10 [11:40<00:00, 70.09s/it]


Running Bail dataset


100%|██████████| 10/10 [15:15<00:00, 91.51s/it]


Running Bail dataset


100%|██████████| 10/10 [19:45<00:00, 118.55s/it]


In [9]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    for cost_budget in [0.1, 0.3, 0.4, 0.5]:
        result = run_comparison(dataset, cost_budget=cost_budget)
        results.append(result) 
    
results_budget = pd.concat(results)
results_budget = pd.concat([results_budget, results_comparison])
results_budget.to_csv('./results/results_budget.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [29:25<00:00, 176.58s/it]


Running FICO dataset


100%|██████████| 10/10 [28:25<00:00, 170.59s/it]


Running FICO dataset


100%|██████████| 10/10 [28:00<00:00, 168.08s/it]


Running FICO dataset


100%|██████████| 10/10 [28:39<00:00, 171.98s/it]


Running COMPAS dataset


100%|██████████| 10/10 [07:13<00:00, 43.38s/it]


Running COMPAS dataset


100%|██████████| 10/10 [07:03<00:00, 42.39s/it]


Running COMPAS dataset


100%|██████████| 10/10 [06:58<00:00, 41.90s/it]


Running COMPAS dataset


100%|██████████| 10/10 [06:54<00:00, 41.46s/it]


Running Adult dataset


100%|██████████| 10/10 [1:17:04<00:00, 462.49s/it]


Running Adult dataset


100%|██████████| 10/10 [1:15:09<00:00, 450.93s/it]


Running Adult dataset


100%|██████████| 10/10 [1:15:57<00:00, 455.72s/it]


Running Adult dataset


100%|██████████| 10/10 [1:17:28<00:00, 464.83s/it]


Running Bail dataset


100%|██████████| 10/10 [08:37<00:00, 51.77s/it]


Running Bail dataset


100%|██████████| 10/10 [08:33<00:00, 51.36s/it]


Running Bail dataset


100%|██████████| 10/10 [08:33<00:00, 51.39s/it]


Running Bail dataset


100%|██████████| 10/10 [08:30<00:00, 51.03s/it]


## Exp 4. Intercept Adjustment (Appendix)

In [6]:
def run_intercept(dataset, n_iter=10, n_estimators=100, cost_budget=0.2):
    
    results = {
        'dataset': [],
        'method': [],
        'n_estimators': [],
        'epsilon': [],
        'cost_budget': [],
        'accuracy': [],
        'f1': [],
        'AUC': [],
        'recourse': [],
        'validity': [],
        'cost': [],
    }

    print('Running {} dataset'.format(dataset.name))
    for _ in tqdm(range(n_iter)):

        X_tr, X_vl, X_ts, y_tr, y_vl, y_ts = dataset.get_dataset(split=True, test_size=0.25, validation_size=0.25)
        action = Action(dataset.params, cost_budget=cost_budget)
        action = action.fit(X_tr, y_tr)

        for method in ['Vanilla', 'OAF', 'RABIT']:
            gamma = 0.002 if method == 'RABIT' else 0.0
            estimator = RecourseBoostingClassifier(action, n_estimators=n_estimators, gamma=gamma, only_actionable_features=(method == 'OAF'))        
            estimator = estimator.fit(X_tr, y_tr)            
            explainer = RecourseExplainer(estimator, action)
            X_cf = explainer.generate_recourse_calibration_samples(X_vl)

            for epsilon in [0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5]:
                estimator = estimator.optimize_intercept(X_cf, epsilon=epsilon)
                recourse = explainer.explain_recourse(X_ts)
                results['dataset'].append(dataset.name)
                results['method'].append(method)
                results['n_estimators'].append(n_estimators)
                results['epsilon'].append(epsilon)
                results['cost_budget'].append(cost_budget)
                results['accuracy'].append(estimator.score(X_ts, y_ts))
                results['f1'].append(f1_score(y_ts, estimator.predict(X_ts)))
                results['AUC'].append(roc_auc_score(y_ts, estimator.predict_proba(X_ts)[:, 1]))
                results['recourse'].append(recourse.get_recourse())
                results['validity'].append(recourse.get_validity())
                results['cost'].append(recourse.get_cost())
                estimator.intercept_ = 0.0
            
    results = pd.DataFrame(results)    
    return results

In [7]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    result = run_intercept(dataset)
    results.append(result) 
    
results_intercept = pd.concat(results)
results_intercept.to_csv('./results/results_intercept.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [14:06<00:00, 84.68s/it]


Running COMPAS dataset


100%|██████████| 10/10 [07:46<00:00, 46.61s/it]


Running Adult dataset


100%|██████████| 10/10 [43:41<00:00, 262.12s/it]


Running Bail dataset


100%|██████████| 10/10 [05:22<00:00, 32.30s/it]


## Exp 5. Brittleness Analysis (Appendix)

In [2]:
def get_brittleness(estimator, X, var, params, n_repeat=100):
    X_rep = np.repeat(X, n_repeat, axis=0)
    y_rep = np.repeat(estimator.predict(X), n_repeat)
    pert = np.random.multivariate_normal(np.zeros(X.shape[1]), 0.1 * np.diag(var), size=(X_rep.shape[0],))
    pert[:, params['is_immutable']] = 0.0
    pert[:, params['is_unincreasable']] = np.clip(pert[:, params['is_unincreasable']], None, 0.0)
    pert[:, params['is_irreducible']] = np.clip(pert[:, params['is_irreducible']], 0.0, None)
    return (estimator.predict(X_rep + pert) != y_rep).mean()


def run_brittleness(dataset, n_iter=10, n_estimators=100, cost_budget=0.2):
    
    results = {
        'dataset': [],
        'method': [],
        'gamma': [],
        'brittleness': []
    }

    print('Running {} dataset'.format(dataset.name))
    for _ in tqdm(range(n_iter)):

        X_tr, X_ts, y_tr, y_ts = dataset.get_dataset(split=True, test_size=0.25)
        action = Action(dataset.params, cost_budget=cost_budget)
        action = action.fit(X_tr, y_tr)

        for method in ['Vanilla', 'OAF', 'RABIT']:
            gamma = 0.002 if method == 'RABIT' else 0.0
            results['dataset'].append(dataset.name)
            results['method'].append(method)
            results['gamma'].append(gamma)

            estimator = RecourseBoostingClassifier(action, n_estimators=n_estimators, gamma=gamma, only_actionable_features=(method == 'OAF'))        
            estimator = estimator.fit(X_tr, y_tr)
            brittleness = get_brittleness(estimator, X_ts, X_tr.var(axis=0), dataset.params)
            results['brittleness'].append(brittleness)
            
    results = pd.DataFrame(results)    
    return results

In [3]:
results = []

for dataset in [FicoDataset(), CompasDataset(), AdultDataset(), BailDataset()]:
    result = run_brittleness(dataset)
    results.append(result) 
    
results_brittleness = pd.concat(results)
results_brittleness.to_csv('./results/results_brittleness.csv', index=False)

Running FICO dataset


100%|██████████| 10/10 [07:45<00:00, 46.59s/it]


Running COMPAS dataset


100%|██████████| 10/10 [01:32<00:00,  9.25s/it]


Running Adult dataset


100%|██████████| 10/10 [15:53<00:00, 95.39s/it]


Running Bail dataset


100%|██████████| 10/10 [02:19<00:00, 13.97s/it]
