In [1]:
import sys
sys.path.append('..')

In [2]:
import tqdm
import pickle
import warnings
import numpy as np
import pandas as pd
from copy import deepcopy
from typing import List, Callable

from src.data import *
from src.model import *
from src.utils import *
from src.recourse import *

warnings.filterwarnings('ignore')

In [3]:
def append_result(d, alg_name, seed, alpha, lamb, beta, p, prediction, j_val, cost):
    d['alg'].append(alg_name)
    d['seed'].append(seed)
    d['alpha'].append(alpha)
    d['lambda'].append(lamb)
    d['p'].append(p)
    d['prediction'].append(prediction)
    d['beta'].append(beta)
    d['J'].append(j_val)
    d['Cost'].append(cost)

In [4]:
def recourse_runner(seed:int, X: np.ndarray, lar_recourse: LARRecourse, lar_recourse_shifted: LARRecourse, roar_recourse: ROAR, base_model: Model, shifted_model: Model, X_train: np.ndarray, params: dict, dataset: Dataset, predictions: List):
    alpha = params['alpha']
    lamb = params['lamb']
    betas = np.arange(0., 1.01, 0.01).round(2)
    
    if isinstance(base_model, LR):
        weights_s = shifted_model.model.coef_[0]
        bias_s = shifted_model.model.intercept_
        
    results_opt = {'alg': [], 'seed': [], 'alpha': [], 'lambda': [], 'p': [], 'prediction': [], 'beta': [], 'Cost': [], 'J': []}
    results_roar = deepcopy(results_opt)
    
    n = len(X)
    for i in tqdm.trange(n, desc=f'Evaluating recourse | alpha={alpha}; lambda={lamb}', colour='#0091ff'):
        x_0 = X[i]
        J = RecourseCost(x_0, lamb)
        
        if isinstance(base_model, NN):
            weights_0, bias_0 = lime_explanation(base_model.predict, X_train, x_0)
            weights_0, bias_0 = np.round(weights_0, 4), np.round(bias_0, 4)
            theta_0 = np.hstack((weights_0, bias_0))
        
            lar_recourse.weights = weights_0
            lar_recourse.bias = bias_0
            
            roar_recourse.set_weights(weights_0)
            roar_recourse.set_bias(bias_0)
            
            weights_s, bias_s = lime_explanation(shifted_model.predict, X_train, x_0)
            weights_s, bias_s = np.round(weights_0, 4), np.round(bias_0, 4)
            theta_s = np.hstack((weights_s, bias_s))
            lar_recourse_shifted.weights = weights_s
            lar_recourse_shifted.bias = bias_s
            
            predictions = generate_nn_smoothness_predictions(theta_0, theta_s, alpha)
            
        x_s = lar_recourse_shifted.get_recourse(x_0, beta=0., theta_p=(weights_s, bias_s))
        
        # Compute J value of x_s
        J_opt_shifted = J.eval(x_s, weights_s, bias_s)
        
        for p, theta_p in predictions:
            for beta in betas:
                if 'alg1' in params['algs']:
                    x = lar_recourse.get_recourse(x_0, beta=beta, theta_p=theta_p)
                    J_val = J.eval(x, weights_s, bias_s)
                    J_norm = J_val[0] - J_opt_shifted[0]
                    append_result(results_opt, 'Alg1', seed, alpha, lamb, beta, str(p), str(np.hstack(theta_p).round(2)), J_norm, -1)
                
                if 'roar' in params['algs']:
                    if beta in np.arange(0., 1.1, 0.2):
                        x, _ = roar_recourse.get_recourse(x_0, beta=beta, theta_p=theta_p)
                        J_val = J.eval(x, weights_s, bias_s)
                        J_norm = J_val[0] - J_opt_shifted[0]
                        append_result(results_roar, 'ROAR', seed, alpha, lamb, beta, str(p), str(np.hstack(theta_p).round(2)), J_norm, -1)
    
    df_results = pd.DataFrame()
    if 'alg1' in params['algs']:
        df_opt = pd.DataFrame(results_opt)
        if params['save_history']:
            print(f'[Alg1] Saving history for {dataset.name} run {seed}')
            df_opt.to_pickle(f'../results/smoothness/history/{params["base_model"]}_{dataset.name}_alg1_{seed}.pkl')
        df_opt_agg = df_opt.groupby(['alg', 'p', 'beta'], as_index=False).mean(True)
        if params['save_results']:
            print(f'[Alg1] Saving results for {dataset.name} run {seed}')
            df_opt_agg.to_pickle(f'../results/smoothness/output/{params["base_model"]}_{dataset.name}_alg1_{seed}.pkl')
        df_results = pd.concat((df_results, df_opt_agg))
    
    if 'roar' in params['algs']:
        df_roar = pd.DataFrame(results_roar)
        if params['save_history']:
            print(f'[ROAR] Saving history for {dataset.name} run {seed}')
            df_roar.to_pickle(f'../results/smoothness/history/{params["base_model"]}_{dataset.name}_roar_{seed}.pkl')
        df_roar_agg = df_roar.groupby(['alg', 'p', 'beta'], as_index=False).mean(True)
        if params['save_results']:
            print(f'[ROAR] Saving results for {dataset.name} run {seed}')
            df_roar_agg.to_pickle(f'../results/smoothness/output/{params["base_model"]}_{dataset.name}_roar_{seed}.pkl')
        df_results = pd.concat((df_results, df_roar_agg))
    
    return df_results
            

In [5]:
def run_experiment(dataset: Dataset, params: dict, results: List):
    alpha = params['alpha']
    
    for seed in params['seeds']:
        (train_data, test_data), (train_data_shifted, test_data_shifted) = dataset.get_data(seed, shift=True)
        X_train, y_train = train_data
        X_test, y_test = test_data
        X_train_shifted, y_train_shifted = train_data_shifted
        X_test_shifted, y_test_shifted = test_data_shifted
        
        if params['base_model'] == 'nn':
            base_model, shifted_model = NN(X_train.shape[1]), NN(X_train.shape[1])
        else:
            base_model, shifted_model = LR(), LR()
            
        base_model.train(X_train.values, y_train.values)
        shifted_model.train(X_train_shifted.values, y_train_shifted.values)
        
        recourse_needed_X_train = recourse_needed(base_model.predict, X_train.values)
        recourse_needed_X_test = recourse_needed(base_model.predict, X_test.values)
        
        weights_0, bias_0 = None, None
        weights_s, bias_s = None, None
        predictions = []
        if params['base_model'] == 'lr':
            weights_0 = base_model.model.coef_[0]
            bias_0 = base_model.model.intercept_
            theta_0 = np.hstack((weights_0, bias_0))
            
            weights_s = shifted_model.model.coef_[0]
            bias_s = shifted_model.model.intercept_
            theta_s = np.hstack((weights_s, bias_s))
            predictions = generate_lr_smoothness_predictions(theta_0, theta_s, alpha)

        lar_recourse = LARRecourse(weights=weights_0, bias=bias_0, alpha=alpha)
        lar_recourse_shifted = LARRecourse(weights=weights_s, bias=bias_s, alpha=alpha)
        roar_recourse = ROAR(weights=weights_0, bias=bias_0, alpha=alpha)
        
        params['lamb'] = lar_recourse.choose_lambda(recourse_needed_X_train, base_model.predict, X_train)
        lar_recourse.lamb = params['lamb']
        lar_recourse_shifted.lamb = params['lamb']
        roar_recourse.lamb = params['lamb']
        
        # params['lamb_roar'] = roar_recourse.choose_lambda(recourse_needed_X_train, base_model.predict, X_train)
        # roar_recourse.lamb = params['lamb_roar']
        
        df_result = recourse_runner(seed, recourse_needed_X_test, lar_recourse, lar_recourse_shifted, roar_recourse, base_model, shifted_model, X_train, params, dataset, predictions)
        results.append(df_result)

In [None]:
torch.manual_seed(0)

d_results = {}
params = {}
params['alpha'] = 0.5
params['lamb'] = None
params['lamb_roar'] = None
params['base_model'] = 'lr' # 'lr', 'nn
params['seeds'] = range(4,5)
params['algs'] = ['roar'] # 'alg1', 'roar
params['save_results'] = True
params['save_history'] = True
params['save_final_results'] = False


# datasets = [SyntheticDataset(), GermanDataset(), SBADataset()]
datasets = [SBADataset()]
for dataset in datasets:
    results = []
    
    print(f'Running {dataset.name} data...')
    run_experiment(dataset, params, results)
    
    d_results[dataset.name] = pd.concat(results)
    if params['save_final_results']:
        d_results[dataset.name].to_pickle(f'../results/smoothness/output/lr_{dataset.name}')
    
    print(f'Finished {dataset.name}\n')

Running sba data...
Choosing lambda


lambda=0.1: 100%|██████████| 151/151 [00:00<00:00, 30027.49it/s]
lambda=0.2: 100%|██████████| 151/151 [00:00<00:00, 30174.85it/s]
lambda=0.3: 100%|██████████| 151/151 [00:00<00:00, 27444.64it/s]
lambda=0.4: 100%|██████████| 151/151 [00:00<00:00, 27201.82it/s]
lambda=0.5: 100%|██████████| 151/151 [00:00<00:00, 29286.04it/s]
lambda=0.6: 100%|██████████| 151/151 [00:00<00:00, 27240.43it/s]
lambda=0.7: 100%|██████████| 151/151 [00:00<00:00, 30265.69it/s]
lambda=0.8: 100%|██████████| 151/151 [00:00<00:00, 28860.33it/s]
lambda=0.9: 100%|██████████| 151/151 [00:00<00:00, 27463.68it/s]
lambda=1.0: 100%|██████████| 151/151 [00:00<00:00, 28325.95it/s]
Evaluating recourse | alpha=0.5; lambda=1.0:   0%|[38;2;0;145;255m          [0m| 0/38 [00:00<?, ?it/s]