In [11]:
import pandas as pd
from tqdm import tqdm
from experiments import Experiment

In [12]:
df = pd.read_csv('../data/german_credit.csv')

In [13]:
features = [
'status_checking_account',
'duration_in_month',
'credit_history',
'purpose',
'savings',
'employement_since',
'installment_rate',
'debters',
'resident_since',
'property',
'age',
'other_installments',
'housing',
'num_credits',
'job',
'num_liable',
'telephone',
'foreign_worker'
]
protected_attribute = "gender"
task_types =  ["is_good_loan", "is_high_credit"]
model_types = ["logistic", "gbm", "nn", "svm", "tree"]
n_train = 800
n_test = 200

In [14]:
data = {
    "is_good_loan": df[features + [protected_attribute, "is_good_loan"]],
    "is_high_credit": df[features + [protected_attribute, "is_high_credit"]]
}

# Randomness Based on Distance to Threshold

In [15]:
random_thresholds = [0, 0.1, 0.15, 0.2, 0.25, 0.3, 0.4, 0.5]
data_scales = [5]
random_seeds = list(range(5))

results = []
conformal_pred = False

for data_scale in tqdm(data_scales):
    for random_seed in tqdm(random_seeds):
        exp = Experiment(data, protected_attribute, task_types,
                             model_types, random_thresholds, n_train, n_test, random_seed, data_scale, conformal_pred)

        exp.pretrain_models()
        results.append(exp.experiment_baseline())
        results.append(exp.experiment_tasks())
        results.append(exp.experiment_models())
        
        results.append(exp.experiment_features())

        if data_scale >= len(model_types):
            results.append(exp.experiment_partitions())
            results.append(exp.experiment_all())
            
results = pd.concat(results)
results.to_csv("german_credit_results.csv", index=False)

  0%|                                                     | 0/1 [00:00<?, ?it/s]
  0%|                                                     | 0/5 [00:00<?, ?it/s][A

is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
Running Data Partitions Experiment
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
Running All Variations Experiment
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
Running Models Experiment



 20%|█████████                                    | 1/5 [00:11<00:46, 11.51s/it][A

is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
Running Data Partitions Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
Running All Variations Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
Running Models Experiment



 40%|██████████████████                           | 2/5 [00:22<00:33, 11.21s/it][A

is_good_loan svm
is_good_loan tree
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_high_credit svm
is_high_credit tree
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan svm
is_good_loan tree
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_high_credit svm
is_high_credit tree
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
Running Data Partitions Experiment
is_good_loan svm
is_good_loan tree
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_high_credit svm
is_high_credit tree
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
Running All Variations Experiment
is_good_loan svm
is_good_loan tree
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_high_credit svm
is_high_credit tree
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
Running Models Experiment



 60%|███████████████████████████                  | 3/5 [00:33<00:22, 11.04s/it][A

is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
Running Data Partitions Experiment
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
Running All Variations Experiment
is_good_loan svm
is_good_loan nn
is_good_loan gbm
is_good_loan logistic
is_good_loan tree
is_high_credit svm
is_high_credit nn
is_high_credit gbm
is_high_credit logistic
is_high_credit tree
Running Models Experiment



 80%|████████████████████████████████████         | 4/5 [00:44<00:11, 11.20s/it][A

is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan gbm
is_good_loan nn
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit gbm
is_high_credit nn
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan gbm
is_good_loan nn
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit gbm
is_high_credit nn
Running Data Partitions Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan gbm
is_good_loan nn
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit gbm
is_high_credit nn
Running All Variations Experiment
is_good_loan logistic
is_good_loan tree
is_good_loan svm
is_good_loan gbm
is_good_loan nn
is_high_credit logistic
is_high_credit tree
is_high_credit svm
is_high_credit gbm
is_high_credit nn
Running Models Experiment



100%|█████████████████████████████████████████████| 5/5 [00:55<00:00, 11.20s/it][A
100%|█████████████████████████████████████████████| 1/1 [00:55<00:00, 55.99s/it]


# Randomness Based on Conformal Prediction

In [16]:
random_thresholds = [0, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]
data_scales = [5]
random_seeds = list(range(5))

results = []
conformal_pred = True

for data_scale in tqdm(data_scales):
    for random_seed in tqdm(random_seeds):
        exp = Experiment(data, protected_attribute, task_types,
                             model_types, random_thresholds, n_train, n_test, random_seed, data_scale, conformal_pred)

        exp.pretrain_models()
        results.append(exp.experiment_baseline())
        results.append(exp.experiment_tasks())
        results.append(exp.experiment_models())
        
        results.append(exp.experiment_features())

        if data_scale >= len(model_types):
            results.append(exp.experiment_partitions())
            results.append(exp.experiment_all())

results = pd.concat(results)
results.to_csv("german_credit_results_conformal.csv", index=False)

  0%|                                                     | 0/1 [00:00<?, ?it/s]
  0%|                                                     | 0/5 [00:00<?, ?it/s][A

is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
Running Data Partitions Experiment
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
Running All Variations Experiment
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
Running Models Experiment



 20%|█████████                                    | 1/5 [00:35<02:20, 35.21s/it][A

is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
Running Data Partitions Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
Running All Variations Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
Running Models Experiment



 40%|██████████████████                           | 2/5 [01:09<01:44, 34.85s/it][A

is_good_loan gbm
is_good_loan nn
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_high_credit gbm
is_high_credit nn
is_high_credit svm
is_high_credit tree
is_high_credit logistic
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan gbm
is_good_loan nn
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_high_credit gbm
is_high_credit nn
is_high_credit svm
is_high_credit tree
is_high_credit logistic
Running Data Partitions Experiment
is_good_loan gbm
is_good_loan nn
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_high_credit gbm
is_high_credit nn
is_high_credit svm
is_high_credit tree
is_high_credit logistic
Running All Variations Experiment
is_good_loan gbm
is_good_loan nn
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_high_credit gbm
is_high_credit nn
is_high_credit svm
is_high_credit tree
is_high_credit logistic
Running Models Experiment



 60%|███████████████████████████                  | 3/5 [01:44<01:09, 34.86s/it][A

is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
Running Data Partitions Experiment
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
Running All Variations Experiment
is_good_loan gbm
is_good_loan svm
is_good_loan tree
is_good_loan logistic
is_good_loan nn
is_high_credit gbm
is_high_credit svm
is_high_credit tree
is_high_credit logistic
is_high_credit nn
Running Models Experiment



 80%|████████████████████████████████████         | 4/5 [02:20<00:35, 35.19s/it][A

is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan tree
is_good_loan svm
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit tree
is_high_credit svm
Running Baseline Experiment
Running Tasks Experiment
Running Models Experiment
Running Features Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan tree
is_good_loan svm
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit tree
is_high_credit svm
Running Data Partitions Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan tree
is_good_loan svm
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit tree
is_high_credit svm
Running All Variations Experiment
is_good_loan logistic
is_good_loan nn
is_good_loan gbm
is_good_loan tree
is_good_loan svm
is_high_credit logistic
is_high_credit nn
is_high_credit gbm
is_high_credit tree
is_high_credit svm
Running Models Experiment



100%|█████████████████████████████████████████████| 5/5 [02:55<00:00, 35.15s/it][A
100%|████████████████████████████████████████████| 1/1 [02:55<00:00, 175.73s/it]


### Tracking Which Risk Scores Get Random Predictions Based on Conformal P-Values

In [None]:
random_thresholds = [0, 0.1, 0.2, 0.25, 0.3, 0.35, 0.4, 0.5]
data_scales = [5]
random_seeds = list(range(5))

results = []
conformal_pred = True

for data_scale in tqdm(data_scales):
    for random_seed in tqdm(random_seeds):
        exp = Experiment(data, protected_attribute, task_types,
                             model_types, random_thresholds, n_train, n_test, random_seed, data_scale, conformal_pred)

        exp.pretrain_models()
        results.append(exp.experiment_risk_scores())

results = pd.concat(results)
results = results[(results["threshold"]==0)|(results["random"]==1)].drop(columns=["random"])
results.to_csv("german_credit_risk_scores.csv", index=False)