In [None]:
%load_ext autoreload
%autoreload 2
from mutabledataset import GermanSimDataset
from agent import RationalAgent
from simulation import Simulation
from learner import LogisticLearner

import plot
import numpy as np
import pandas as pd

def print_table(df):
    young = plot.count_df(df, [{'age': 0, 'credit_h': 1}, {'age': 0, 'credit_h': 0}])
    young_n = young.sum()
    young = young / young.sum() * 100

    old = plot.count_df(df, [{'age': 1, 'credit_h': 1}, {'age': 1, 'credit_h': 0}])
    old_n = old.sum()
    old = old / old.sum() * 100
    tbl = pd.DataFrame(data=[young,old], index=['Young ('+str(young_n)+')', 'Old ('+str(old_n)+')'], columns=['Good Credit', 'Bad Credit'])
    return (tbl)
    #plot.plot_pie(sim.dataset_df, [{'age': 0, 'credit_h': 1},
    #                               {'age': 0, 'credit_h': 0}],
    #              ['Good', 'Bad'], 'Young')

    #plot.plot_pie(sim.dataset_df, [{'age': 1, 'credit_h': 1},
    #                               {'age': 1, 'credit_h': 0}],
    #              ['Good', 'Bad'], 'Old')

    #plot.plot_pie(sim.dataset_new_df, [{'age': 0, 'credit_h': 1},
    #                               {'age': 0, 'credit_h': 0}],
    #              ['Good', 'Bad'], 'Young')

    #plot.plot_pie(sim.dataset_new_df, [{'age': 1, 'credit_h': 1},
    #                               {'age': 1, 'credit_h': 0}],
    #              ['Good', 'Bad'], 'Old')



# Logistic Regression

In [None]:

g = GermanSimDataset(mutable_features=['savings'],
                     domains={'savings': 'auto'},
                     discrete=['savings'])
sim = Simulation(g,
                 RationalAgent,
                 LogisticLearner(),
                 lambda size: np.abs(np.random.normal(loc=1.0,size=size)))
 
sim.start_simulation(include_protected=True)


In [None]:
df,_ = sim.train.convert_to_dataframe(de_dummy_code=True)
print(np.array(df['age'] == 0).sum())

### Mutable Features

In [None]:
print("young")
plot.plot_mutable_features(sim,selection_criteria={'age':0})
print("old")
plot.plot_mutable_features(sim,selection_criteria={'age':1})

### Young, Old (Pre Simulation)

In [None]:
print_table(sim.dataset_df)

### Young, Old (Post Simulation)

In [None]:
print_table(sim.dataset_new_df)

# Logistic Regression with Reweighing

In [None]:
from learner import ReweighingLogisticLearner

g = GermanSimDataset(mutable_features=['savings'],
                     domains={'savings': 'auto'},
                     discrete=['savings'],
                     protected_attribute_names=['age'],
                     privileged_classes=[lambda x: x >= 25],
                     features_to_drop=['personal_status', 'sex'])

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

sim = Simulation(g,
                 RationalAgent,
                 ReweighingLogisticLearner(privileged_groups,
                                           unprivileged_groups),
                 lambda size: np.abs(np.random.normal(loc=1.0,size=size)))
 
sim.start_simulation(include_protected=True)

### Young, Old (Pre Simulation)

In [None]:
print_table(sim.dataset_df)

### Young, Old (Post Simulation)

In [None]:
print_table(sim.dataset_new_df)

### Mutable Features

In [None]:
print("young")
plot.plot_mutable_features(sim,selection_criteria={'age':0})
print("old")
plot.plot_mutable_features(sim,selection_criteria={'age':1})

### Comments
Statistical parity difference improved.

In [None]:
df = sim.dataset_df
unique_elements, counts_elements = np.unique(df[df['age'] == 0]['status'], return_counts=True)
print(unique_elements, counts_elements/sum(counts_elements))

unique_elements, counts_elements = np.unique(df[df['age'] == 1]['status'], return_counts=True)
print(unique_elements, counts_elements/sum(counts_elements))

Above shows, that young people have less money in their bank account. Bank account balance is quite relevant for the output. Thus we can still infer age via bank account.

After playing around with the other fairness measures on the web application, I doubt that any of those will change this outcome for this particular dataset + protected attribute.

Other options: (basically do some actual affirmative action)
- play around with thresholds, see what happens
- different cost distribution, subsidize skill investement (feature manipulation)
- 
All the fairness measures in AIF360 are not affirmative action

In [None]:
from sklearn.linear_model import LogisticRegression

print(sim.dataset.label_names)
reg = LogisticRegression(solver='liblinear',max_iter=1000000000,C=1000000000000000000000.0).fit(sim.dataset.features, sim.dataset.labels.ravel())

df = pd.DataFrame(data=sim.dataset.features, columns=sim.dataset.feature_names)
data = plot._df_selection(df, {'age': 1})
print(list(reg.predict([data.values[0]])))

# next steps: implement modified decision function that enforces statistical parity...

## Enforce Statistical Parity (threshold boost for unprivileged)

In [None]:
from learner import StatisticalParityLogisticLearner

g = GermanSimDataset(mutable_features=['savings'],
                     domains={'savings': 'auto', 'status': 'auto'},
                     discrete=['status', 'savings'],
                     protected_attribute_names=['age'],
                     privileged_classes=[lambda x: x >= 25],
                     features_to_drop=['personal_status', 'sex'])

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

sim = Simulation(g,
                 RationalAgent,
                 StatisticalParityLogisticLearner(privileged_groups,
                                           unprivileged_groups, 0.0000001),
                 lambda size: np.abs(np.random.normal(loc=1.0,size=size)))
 
sim.start_simulation(include_protected=True)

### Young, Old (Pre Simulation)

In [None]:
print_table(sim.dataset_df)

### Young, Old (Post Simulation)

In [None]:
print_table(sim.dataset_new_df)

### Mutable Features

In [None]:
print("young")
plot.plot_mutable_features(sim,selection_criteria={'age':0})
print("old")
plot.plot_mutable_features(sim,selection_criteria={'age':1})

# Adversial Debiasing

In [None]:
from learner import AdversialDebiasingLogisticLearner

g = GermanSimDataset(mutable_features=['status', 'savings'],
                     domains={'savings': 'auto', 'status': 'auto'},
                     discrete=['status', 'savings'],
                     protected_attribute_names=['age'],
                     privileged_classes=[lambda x: x >= 25],
                     features_to_drop=['personal_status', 'sex'])

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

sim = Simulation(g,
                 RationalAgent,
                 AdversialDebiasingLogisticLearner(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups),
                 lambda size: np.abs(np.random.normal(loc=0.5,size=size)))
 
sim.start_simulation(include_protected=True)

### Young, Old (Pre Simulation)

In [None]:
print_table(sim.dataset_df)

### Young, Old (Post Simulation)

In [None]:
print_table(sim.dataset_new_df)

# EqOdds Postprocessing

In [None]:
from learner import EqOddsPostprocessingLogisticLearner

g = GermanSimDataset(mutable_features=['status', 'savings'],
                     domains={'savings': 'auto', 'status': 'auto'},
                     discrete=['status', 'savings'],
                     protected_attribute_names=['age'],
                     privileged_classes=[lambda x: x >= 25],
                     features_to_drop=['personal_status', 'sex'])

privileged_groups = [{'age': 1}]
unprivileged_groups = [{'age': 0}]

sim = Simulation(g,
                 RationalAgent,
                 EqOddsPostprocessingLogisticLearner(privileged_groups = privileged_groups,
                          unprivileged_groups = unprivileged_groups),
                 lambda size: np.abs(np.random.normal(loc=0.5,size=size)))
 
sim.start_simulation(include_protected=True)

### Young, Old (Pre Simulation)

In [None]:
print_table(sim.dataset_df)

### Young, Old (Post Simulation)

In [None]:
print_table(sim.dataset_new_df)