In [1]:
import sys
sys.path.append('..')

In [2]:
import tqdm
import warnings
import numpy as np
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
from copy import deepcopy

from src.data import *
from src.model import *
from src.recourse import *
from src.utils import *

warnings.filterwarnings('ignore')

In [3]:
def append_result(d, alg_name, seed, alpha, lamb, i, x_0, theta_0, beta, x_r, theta_r, J):
    d['alg'].append(alg_name)
    d['seed'].append(seed)
    d['alpha'].append(alpha)
    d['lambda'].append(lamb)
    d['i'].append(i)
    d['x_0'].append(x_0.round(4))
    d['theta_0'].append(theta_0.round(4))
    d['beta'].append(beta)
    d['x_r'].append(x_r.round(4))
    d['theta_r'].append(theta_r.round(4))
    d['J'].append(J)

In [4]:
def recourse_runner(seed: int, X_train: np.ndarray, X: np.ndarray, roar_recourse: ROAR, base_model: NN, params: dict, dataset: Dataset):
    alpha = params['alpha']
    lamb = params['lamb']
    weights_0, bias_0 = roar_recourse.weights.numpy(), roar_recourse.bias.numpy()
    theta_0 = np.hstack((weights_0, bias_0))
    
    results = {'alg': [], 'seed': [], 'alpha': [], 'lambda': [], 'i': [], 'x_0': [], 'theta_0': [], 'beta': [], 'x_r': [], 'theta_r': [], 'J': []}
    
    n = len(X)
    for i in tqdm.trange(n, desc=f'Evaluating recourse | alpha={alpha}; lambda={lamb}', colour='#0091ff'):
        x_0 = X[i]
        J = RecourseCost(x_0, lamb)
        
        if params['base_model'] == 'nn':
            # LIME approximation of original NN
            np.random.seed(i)
            weights_0, bias_0 = lime_explanation(base_model.predict, X_train, x_0)
            weights_0, bias_0 = np.round(weights_0, 4), np.round(bias_0, 4)
            theta_0 = np.hstack((weights_0, bias_0))
            
            # Initalize recourse methods with theta_0
            roar_recourse.set_weights(weights_0)
            roar_recourse.set_bias(bias_0)
        
        beta = 1.
        # ROAR with L-inf Norm Adversary
        x_r, _ = roar_recourse.get_recourse(x_0, beta=beta, w_norm='L-inf')
        weights_r, bias_r = roar_recourse.calc_theta_adv(x_r)
        theta_r = np.hstack((weights_r, bias_r))
        J_r = J.eval(x_r, weights_r, bias_r)
        append_result(results, 'ROAR L-inf', seed, alpha, lamb, i, x_0, theta_0, beta, x_r, theta_r, J_r[0])
        
        # ROAR with L-1 Norm Adversary
        x_r,_ = roar_recourse.get_recourse(x_0, beta=beta, w_norm='L-1')
        weights_r, bias_r = roar_recourse.calc_theta_adv(x_r)
        theta_r = np.hstack((weights_r, bias_r))
        J_r = J.eval(x_r, weights_r, bias_r)
        append_result(results, 'ROAR L-1', seed, alpha, lamb, i, x_0, theta_0, beta, x_r, theta_r, J_r[0])
    
    return pd.DataFrame(results)

In [5]:
def run_experiment(dataset: Dataset, params: dict, results: List):
    alpha = params['alpha']
    
    for seed in params['seeds']:
        (train_data, test_data) = dataset.get_data(seed)
        X_train, y_train = train_data
        X_test, y_test = test_data
        
        weights, bias = None, None
        if params['base_model'] == nn:
            base_model = NN(X_train.shape[1])
            base_model.train(X_train.values, y_train.values)
        else:
            base_model = LR()
            base_model.train(X_train.values, y_train.values)
            weights = base_model.model.coef_[0]
            bias = base_model.model.intercept_
        
        recourse_needed_X_train = recourse_needed(base_model.predict, X_train.values)
        recourse_needed_X_test = recourse_needed(base_model.predict, X_test.values)
        
        lar_recourse = LARRecourse(weights=weights, bias=bias, alpha=alpha)
        roar_recourse = ROAR(weights=weights, bias=bias, alpha=alpha)
        
        params['lamb'] = lar_recourse.choose_lambda(recourse_needed_X_train, base_model.predict, X_train.values)
        lar_recourse.lamb = params['lamb']
        roar_recourse.lamb = params['lamb']
        
        df_results = recourse_runner(seed, X_train.values, recourse_needed_X_test[:], roar_recourse, base_model, params, dataset)
        results.append(df_results)

In [22]:
torch.manual_seed(0)

d_results = {}
params = {}
params['alpha'] = 0.5
params['lamb'] = None
params['seeds'] = range(1)
params['base_model'] = 'nn'

datasets = [SBADataset()]
for dataset in datasets:
    results = []
    
    print(f'Running {dataset.name} data...')
    run_experiment(dataset, params, results)
    d_results[dataset.name] = pd.concat(results)
    print(f'Finished {dataset.name}\n')

Running sba data...
Choosing lambda


lambda=0.1: 100%|██████████| 150/150 [00:00<00:00, 29030.34it/s]
lambda=0.2: 100%|██████████| 150/150 [00:00<00:00, 26288.89it/s]
lambda=0.3: 100%|██████████| 150/150 [00:00<00:00, 24546.28it/s]
lambda=0.4: 100%|██████████| 150/150 [00:00<00:00, 28100.66it/s]
lambda=0.5: 100%|██████████| 150/150 [00:00<00:00, 26891.16it/s]
lambda=0.6: 100%|██████████| 150/150 [00:00<00:00, 27994.38it/s]
lambda=0.7: 100%|██████████| 150/150 [00:00<00:00, 27456.82it/s]
lambda=0.8: 100%|██████████| 150/150 [00:00<00:00, 26886.56it/s]
lambda=0.9: 100%|██████████| 150/150 [00:00<00:00, 27726.66it/s]
lambda=1.0: 100%|██████████| 150/150 [00:00<00:00, 27323.27it/s]
Evaluating recourse | alpha=0.5; lambda=1.0: 100%|[38;2;0;145;255m██████████[0m| 39/39 [00:39<00:00,  1.01s/it]

Finished sba






In [23]:
for dataset in datasets:
    print(f'{dataset.name.upper()} | {params["base_model"].upper()}')
    df_result = d_results[dataset.name]
    df_agg = df_result.groupby(['alg']).mean(True)[['J']]
    display(df_agg)

SBA | NN


Unnamed: 0_level_0,J
alg,Unnamed: 1_level_1
ROAR L-1,1.961248
ROAR L-inf,4.916656


In [28]:
df_l1 = df_result[df_result['alg']=='ROAR L-1']
df_linf = df_result[df_result['alg']=='ROAR L-inf']
for i in range(len(df_l1)):
    theta_0 = df_l1['theta_0'].iloc[i]
    theta_r = df_l1['theta_r'].iloc[i]
    assert(np.linalg.norm(theta_0-theta_r, 1).round(2) <= params['alpha'])