In [32]:
import pandas as pd
import sklearn
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import random
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn import svm, ensemble
from sklearn.neural_network import MLPClassifier
from sklearn.calibration import CalibratedClassifierCV
from sklearn.tree import DecisionTreeClassifier
import warnings
warnings.filterwarnings('ignore')

# Load Data

In [33]:
N_TRAIN = 800
N_TEST = 200
german_credit = pd.read_csv('data/german_credit.csv')

In [34]:
features = [
'status_checking_account',
'duration_in_month',
'credit_history',
'purpose',
'savings',
'employement_since',
'installment_rate',
'debters',
'resident_since',
'property',
'age',
'other_installments',
'housing',
'num_credits',
'job',
'num_liable',
'telephone',
'foreign_worker'
]

# Experiment

In [35]:
def format_data(df, application_name, seed, attribute_name="gender"):
    df_train, df_test = sklearn.model_selection.train_test_split(df, test_size=0.2, random_state=seed)

    X_train, X_test = df_train[features].to_numpy(), df_test[features].to_numpy()
    y_train, y_test = df_train[application_name].to_numpy(), df_test[application_name].to_numpy()
    z_train, z_test = df_train[attribute_name].to_numpy(), df_test[attribute_name].to_numpy()
    
    return {'X_tr' : X_train, 'X_test' : X_test, 'y_tr': y_train, 'y_test' : y_test, 'z_tr': z_train, 'z_test': z_test}

In [36]:
def fixed_partition(data, seed, data_scale):
    rng = random.Random(seed)
    index = rng.randint(0, int(data_scale) - 1)

    X_train, y_train, z_train = data['X_tr'], data['y_tr'], data['z_tr']
    X_test, y_test, z_test = data['X_test'], data['y_test'], data['z_test']
    y_train = np.expand_dims(y_train, axis = 1)
    z_train = np.expand_dims(z_train, axis = 1)
    data = np.concatenate((X_train, y_train, z_train), axis = 1)
    np.random.RandomState(seed=seed).shuffle(data)
    X_train, y_train, z_train = data[:, : -2], data[:, -2], data[:, -1]

    N = len(y_train)
    block_length = int(N // data_scale)
    start, end = block_length * index, block_length * index + block_length
    return {'X_tr' : X_train[start : end], 'X_test' : X_test, 'y_tr': y_train[start : end], 'y_test' : y_test, 'z_tr': z_train[start: end], 'z_test' : z_test}

In [37]:
def train_model(data, seed, method="logistic"):
    if method == 'logistic':
        base_clf = LogisticRegression
    elif method == 'gbm':
        base_clf = ensemble.GradientBoostingClassifier
    elif method == 'svm':
        base_clf = svm.SVC
    elif method == 'nn':
        base_clf = MLPClassifier
    elif method == "tree":
        base_clf = DecisionTreeClassifier
    
    base_clf = make_pipeline(StandardScaler(), base_clf(random_state=seed))    
    model = CalibratedClassifierCV(base_clf)
    model.fit(data['X_tr'], data['y_tr'])
        
    scores = model.predict_proba(data['X_test'])
    scores = [i[1] for i in scores]
    return scores

In [38]:
class Homogenization:
    def __init__(self, random_distance, num_models, data_scale, data_seed, model_type=None, application_type=None, model_seed=None, partition_seed=None, size=N_TEST):
        self.random_distance = random_distance
        self.num_models = num_models
        self.data_scale = data_scale
        self.data_seed = data_seed
        self.model_seed = model_seed
        self.partition_seed = partition_seed
        self.model_type = model_type
        self.application_type = application_type
        
        self.accuracy = []
        self.acceptance = []
        
        self.systemic_success = (np.ones(size)==1)
        self.systemic_failure = (np.ones(size)==1)
        
        self.failure_rate = 1
        
        self.fairness_spd = []
        self.fairness_eop = []

    def get_predictions(self, risk_scores):
        pred = []
        for r in risk_scores:
            if (r > 0.5-self.random_distance) and (r < 0.5+self.random_distance):
                pred.append(np.random.binomial(1, r))
            elif (r >= 0.5):
                pred.append(1)
            else:
                pred.append(0)
        return np.array(pred)

    def get_fairness_metrics(self, partition, pred):
        df = pd.DataFrame(partition["z_test"])
        df["y_true"] = partition["y_test"]
        df["y_pred"] = pred
        
        m = df[df[0]==1]
        f = df[df[0]==0]
        
        spd = (m["y_pred"].sum()/len(m)) - (f["y_pred"].sum()/len(f))
        
        df = df[df["y_true"]==1]
        m = df[df[0]==1]
        f = df[df[0]==0]

        eod = (m["y_pred"].sum()/len(m)) - (f["y_pred"].sum()/len(f))
    
        return spd, eod
    
    def update_metrics(self, partition, scores, method="lockout"):
        pred = self.get_predictions(scores)
        spd, eop = self.get_fairness_metrics(partition, pred)
        
        self.accuracy.append(np.sum(pred==partition["y_test"])/len(pred))
        self.acceptance.append(np.sum(pred)/len(pred))
        
        self.fairness_spd.append(spd)
        self.fairness_eop.append(eop)
        
        if method=="lockout":
            self.failure_rate *= np.sum(pred==0)/len(pred)
            self.systemic_success *= (pred==1)
            self.systemic_failure *= (pred==0)
        elif method=="inaccurate":
            self.failure_rate *= np.sum(pred!=partition["y_test"])/len(pred)
            self.systemic_success *= (pred==partition["y_test"])
            self.systemic_failure *= (pred!=partition["y_test"])
        
        
    def final_metrics(self):
        r = {}
        r["random_distance"] = self.random_distance
        r["num_models"] = self.num_models
        r["data_scale"] = self.data_scale
        r["data_seed"] = self.data_seed
        r["model_seed"] = self.model_seed
        r["partition_seed"] = self.partition_seed
        r["model_type"] = self.model_type
        r["application_type"] = self.application_type

        r["accuracy"] = np.mean(self.accuracy)
        r["acceptance"] = np.mean(self.acceptance)
        r["fairness_spd"] = np.mean(self.fairness_spd)
        r["fairness_eop"] = np.mean(self.fairness_eop)
        
        r["systemic_success"] = np.sum(self.systemic_success)/len(self.systemic_success)
        r["systemic_failure"] = np.sum(self.systemic_failure)/len(self.systemic_failure)
        r["multiplicity"] = 1-r["systemic_success"]-r["systemic_failure"]
        
        r["failure_rate"] = self.failure_rate
        r["homogenization_expected_failure"] = r["systemic_failure"]/self.failure_rate
        r["homogenization_avg_failure"] = r["systemic_failure"]/(1-r["acceptance"])
        
        return r

In [39]:
class ModelGroup: 
    def __init__(self, thresholds, method, num_models, data_scale, data_seed, model_type=None, application_type=None, model_seed=None, partition_seed=None, size=N_TEST):
        self.models = []
        self.method = method
        for t in thresholds:
            model = Homogenization(t, num_models, data_scale, data_seed, model_type, application_type, model_seed, partition_seed, size)
            self.models.append(model)
    
    def update_metrics(self, partition, risk_scores):
        for m in self.models:
            m.update_metrics(partition, risk_scores, self.method)
    
    def final_metrics(self):
        results = []
        for m in self.models:
            results.append(m.final_metrics())
        return results

In [40]:
def experiment_baseline(num_models=2, data_scale=1, method="lockout"):
    data_seeds = list(range(10))
    partition_seed = 0
    model_seed = 0
    
    results = []
    
    for model_type in tqdm(["logistic", "gbm", "nn", "svm", "tree"], leave=False, desc="model"):
        for application in ["is_good_loan", "is_high_credit"]:
            for data_seed in data_seeds:
                data = format_data(german_credit, application, data_seed)
                partition = fixed_partition(data, partition_seed, data_scale)
                risk_scores = train_model(partition, model_seed, model_type)

                models = ModelGroup([0, 0.1, 0.2, 0.3, 0.4, 0.5], method, num_models, data_scale, data_seed, model_type, None, None, None)
                for k in range(num_models):
                    models.update_metrics(partition, risk_scores)

                results += models.final_metrics()

    return pd.DataFrame(results)

In [41]:
# Homogenization As Number of Models Grows

results = []
for k in tqdm(range(1, 11)):
    exp = experiment_baseline(k, 2, "lockout")
    results.append(exp)
results_df = pd.concat(results)

results_df.to_csv("german_baseline_lockout.csv", index=False)

results = []
for k in tqdm(range(1, 11)):
    exp = experiment_baseline(k, 2, "inaccurate")
    results.append(exp)
results_df = pd.concat(results)

results_df.to_csv("german_baseline_inaccurate.csv", index=False)

  0%|                                                     | 0/1 [00:00<?, ?it/s]
model:   0%|                                              | 0/5 [00:00<?, ?it/s][A
model:  20%|███████▌                              | 1/5 [00:00<00:01,  3.00it/s][A
model:  40%|███████████████▏                      | 2/5 [00:04<00:07,  2.51s/it][A
model:  60%|██████████████████████▊               | 3/5 [00:18<00:15,  7.85s/it][A
model:  80%|██████████████████████████████▍       | 4/5 [00:19<00:05,  5.04s/it][A
model: 100%|██████████████████████████████████████| 5/5 [00:19<00:00,  3.33s/it][A
100%|█████████████████████████████████████████████| 1/1 [00:19<00:00, 19.61s/it][A


In [None]:
# Homogenization As Data Scale Grows

results = []
for data_scale in tqdm([20, 16, 10, 9, 8, 7, 6, 5, 4, 3, 2]):
    exp = experiment_baseline(2, data_scale, "lockout")
    results.append(exp)
results_df = pd.concat(results)

results_df.to_csv("german_baseline_lockout2.csv", index=False)

results = []
for data_scale in tqdm([20, 16, 10, 9, 8, 7, 6, 5, 4, 3, 2]):
    exp = experiment_baseline(2, data_scale, "inaccurate")
    results.append(exp)
results_df = pd.concat(results)

results_df.to_csv("german_baseline_inaccurate2.csv", index=False)