# Input Influence + Fairness Algrothims

All of the imported packages are needed for this code.

In [None]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow.compat.v1 as tf
tf.disable_eager_execution()
tf.logging.set_verbosity(tf.logging.ERROR)

from sklearn.compose import make_column_transformer
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV, train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestClassifier

from aif360.sklearn.preprocessing import ReweighingMeta
from aif360.sklearn.inprocessing import AdversarialDebiasing, ExponentiatedGradientReduction, GridSearchReduction
from aif360.sklearn.postprocessing import CalibratedEqualizedOdds, PostProcessingMeta
from aif360.sklearn.datasets import fetch_adult, fetch_compas, fetch_german
from aif360.sklearn.metrics import disparate_impact_ratio, average_odds_error, generalized_fpr
from aif360.sklearn.metrics import generalized_fnr, difference, statistical_parity_difference,equal_opportunity_difference
import matplotlib.pyplot as plt
import seaborn as sns

import cvxpy
import statsmodels.api as sm
import shap

import results_generation as rg
import plotting as plt_aif
from load_irl import load_compas, load_census, load_german

### Results for COMPAS

In [None]:
X_train_compas, X_test_compas, y_train_compas, y_test_compas = load_compas(subsample=500)
prot_attr = 'race'
path = 'irl/compas/singlerun_shap'
X1 = "age"
X2 = "priors_count"
nsamples = 100
compas_results = rg.generate_results(X_train_compas, X_test_compas, y_train_compas, y_test_compas, prot_attr, X1, X2, path, nsamples)
plt_aif.plot_all(compas_results, X_test_compas, prot_attr, X1, X2, path)

In [None]:
num_trials = 5
path = 'irl/compas/compas_shap_'
compass_boot_results = rg.trial_compas(num_trials=num_trials, path = path, nsamples = 100)  

In [None]:
compass_err = rg.read_trial_results(path, num_trials) 
compass_err = rg.gen_errors(compass_err)
plt_aif.plot_all(compass_err, X_test_compas, prot_attr, X1, X2, path, err_bar=True)

### Results for Census

In [None]:
X_train_census, X_test_census, y_train_census, y_test_census = load_census()
prot_attr = 'sex'
path = 'irl/census/singlerun_shap'
X1 = "relationship_Wife"
X2 = "relationship_Unmarried"
nsamples = 100
census_results = rg.generate_results(X_train_census, X_test_census, y_train_census, y_test_census, prot_attr, X1, X2, path, nsamples)
plt_aif.plot_all(census_results, X_test_census, prot_attr, X1, X2, path)

In [None]:
num_trials = 5
path = 'irl/census/census_shap_'
census_boot_results = rg.trial_census(num_trials=num_trials, path = path, nsamples = 100)  

In [None]:
census_err = rg.read_trial_results(path, num_trials) 
census_err = rg.gen_errors(census_err)
plt_aif.plot_all(census_err, X_test_census, prot_attr, X1, X2, path, err_bar=True)

### Results for German

In [None]:
X_train_german, X_test_german, y_train_german, y_test_german = load_german()
prot_attr = 'sex'
path = 'irl/german/singlerun_shap'
X1 = "num_dependents"
X2 = "age"
nsamples = 100
german_results = rg.generate_results(X_train_german, X_test_german, y_train_german, y_test_german, prot_attr, X1, X2, path, nsamples)

In [None]:
num_trials = 5
path = 'irl/german/german_shap_'
german_boot_results = rg.trial_german(num_trials=num_trials, path = path, nsamples = 100)  

In [None]:
german_err = rg.read_trial_results(path, num_trials) 
german_err = rg.gen_errors(german_err)
plt_aif.plot_all(german_err, X_test_german, prot_attr, X1, X2, path, err_bar=True)

## Results for Synthetic

#### Scenario in main text

In [None]:
path = 'synthetic/010/a_shap'
nsamples = 100
cor_code='5d'
yname='log_disc'
coeff_code='010'
num_trials= 5
syn_results_a = rg.generate_results_synthetic_trials(path, nsamples, cor_code=cor_code, yname=yname, coeff_code=coeff_code, num_trials=num_trials)

In [None]:
path = 'synthetic/010/a_shap'+ 'trial_'
syn_results_err_a = rg.read_trial_results(path, num_trials)
plt_aif.plot_syn_all_err(syn_results_err_a, yname, coeff_code, path, num_trials)

#### Scenario in appendix

In [None]:
path = 'synthetic/111/b_shap'
nsamples = 10
cor_code='ll'
yname='log_disc'
coeff_code='111'
num_trials= 2
syn_results_b = rg.generate_results_synthetic_trials(path, nsamples, cor_code=cor_code, yname=yname, coeff_code=coeff_code, num_trials=num_trials)

In [None]:
path = 'synthetic/111/b_shap'+ 'trial_'
syn_results_err_b = rg.read_trial_results(path, num_trials)
plt_aif.plot_syn_all_err(syn_results_err_b, yname, coeff_code, path, num_trials)