In [14]:
from FLAI import data
from FLAI import causal_graph
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier



In [25]:
# functions to run files
# train a model
# execute predictions

def execute_adult():
    df = pd.read_pickle('../../Data/adult.pickle')
    df['age'] = df.apply(lambda row: 0 if row['Age (decade)=10'] == 1 
                                else 1 if row['Age (decade)=20'] == 1 
                                else 2 if row['Age (decade)=30'] == 1
                                else 3 if row['Age (decade)=40'] == 1
                                else 4 if row['Age (decade)=50'] == 1
                                else 5 if row['Age (decade)=60'] == 1
                                else 6 if row['Age (decade)=>=70'] == 1
                                else None,axis = 1)
    df['education'] = df.apply(lambda row: 0 if row['Education Years=<6'] == 1 
                                    else 1 if row['Education Years=6'] == 1 
                                    else 2 if row['Education Years=7'] == 1 
                                    else 3 if row['Education Years=8'] == 1 
                                    else 4 if row['Education Years=9'] == 1 
                                    else 5 if row['Education Years=10'] == 1 
                                    else 6 if row['Education Years=11'] == 1 
                                    else 7 if row['Education Years=12'] == 1
                                    else 8 if row['Education Years=>12'] == 1  
                                    else None,axis = 1)
    flai_dataset = data.Data(df[['sex', 'race', 'age','education' ,'label']], transform=True)
    original_X = flai_dataset.data[['sex', 'race', 'age','education']]
    original_y = flai_dataset.data[['label']]
    model_original = DecisionTreeClassifier(random_state=0)
    #model_original = XGBClassifier()
    model_original.fit(original_X, original_y)
    original_y['Predicted'] = model_original.predict(original_X[['sex','race','age','education']])
    original_y['proba'] = model_original.predict_proba(original_X[['sex','race','age','education']])[:,1]
    original_X[['label', 'Predicted','proba']] = original_y[['label', 'Predicted','proba']]

    return data.Data(original_X, transform=False)


def execute_compas():
    df = pd.read_pickle('../../Data/compas.pickle')
    df['age'] = df.apply(lambda row: 0 if row['age_cat=Less than 25'] == 1 
                            else 1 if row['age_cat=25 to 45'] == 1 
                            else 2 if row['age_cat=Greater than 45'] == 1
                            else None,axis = 1)
    df['priors'] = df.apply(lambda row: 0 if row['priors_count=0'] == 1 
                                    else 1 if row['priors_count=1 to 3'] == 1 
                                    else 2 if row['priors_count=More than 3'] == 1 
                                    else None,axis = 1)
    df['charge_degree'] = df.apply(lambda row: 0 if row['c_charge_degree=F'] == 1 
                                    else 1 if row['c_charge_degree=M'] == 1 
                                    else None,axis = 1)
    #### 1 is good 2 is bad in risk.
    df['label'] = df.apply(lambda row: 0 if row['label'] == 1 
                                else 1 if row['label'] == 0 
                                else None,axis = 1)
    flai_dataset = data.Data(df[['sex', 'race', 'age','priors' ,'label']], transform=True)
    original_X = flai_dataset.data[['sex', 'race', 'age','priors']]
    original_y = flai_dataset.data[['label']]
    model_original = DecisionTreeClassifier(random_state=0)
    #model_original = XGBClassifier()
    model_original.fit(original_X, original_y)
    original_y['Predicted'] = model_original.predict(original_X[['sex','race','age','priors']])
    original_y['proba'] = model_original.predict_proba(original_X[['sex','race','age','priors']])[:,1]
    original_X[['label', 'Predicted','proba']] = original_y[['label', 'Predicted','proba']]

    return data.Data(original_X, transform=False)

def execute_german():
    df = pd.read_pickle('../../Data/german.pickle')
    df['credit_history'] = df.apply(lambda row: 0 if row['credit_history=None/Paid'] == 1 
                            else 1 if row['credit_history=Delay'] == 1 
                            else 2 if row['credit_history=Other'] == 1
                            else None,axis = 1)
    df['savings'] = df.apply(lambda row: 0 if row['savings=Unknown/None'] == 1 
                                    else 1 if row['savings=<500'] == 1 
                                    else 2 if row['savings=500+'] == 1 
                                    else None,axis = 1)
    df['employment'] = df.apply(lambda row: 0 if row['employment=Unemployed'] == 1 
                                    else 1 if row['employment=1-4 years'] == 1 
                                    else 2 if row['employment=4+ years'] == 1 
                                    else None,axis = 1)

    #### 1 is good 2 is bad in risk.
    df['label'] = df.apply(lambda row: 1 if row['label'] == 1 
                                else 0 if row['label'] == 2 
                                else None,axis = 1)
    
    flai_dataset = data.Data(df[['age', 'sex', 'credit_history','savings','employment' ,'label']], transform=True)
    original_X = flai_dataset.data[['sex', 'age','credit_history','savings','employment']]
    original_y = flai_dataset.data[['label']]
    model_original = DecisionTreeClassifier(random_state=0)
    #model_original = XGBClassifier()
    model_original.fit(original_X, original_y)
    original_y['Predicted'] = model_original.predict(original_X[['sex', 'age','credit_history','savings','employment']])
    original_y['proba'] = model_original.predict_proba(original_X[['sex', 'age','credit_history','savings','employment']])[:,1]
    original_X[['label', 'Predicted','proba']] = original_y[['label', 'Predicted','proba']]

    return data.Data(original_X, transform=False)


In [26]:
adult_dataset = execute_adult()
compas_dataset = execute_compas()
german_dataset = execute_german()

## ADULT

In [20]:
df_f,datos_f = adult_dataset.fairness_eqa_eqi(features = ['education','age'], 
                              target_column = 'proba', 
                              column_filter = ['sex'],
                              plot = False)
print('EQI & EQA Metrics')
df_f

EQI & EQA Metrics


Unnamed: 0,group,reference,EQI,EQA,F
0,"['sex'](0.0,)","['sex'](1.0,)",-0.06,0.14,0.15


In [21]:
result_metrics = adult_dataset.fairness_metrics(target_column='label', predicted_column = 'Predicted',
                            columns_fair = {'sex' : {'privileged' : 1, 'unprivileged' : 0}})
df_performance,df_fairness = adult_dataset.get_df_metrics(metrics_json=result_metrics)
df_fairness

Calculating metrics for : sex  the value :  1
Calculating metrics for : sex  the value :  0


Unnamed: 0,EOD,DI,SPD,OD
sex_fair_metrics,-0.461182,0.0,-0.214793,-0.568475


## COMPAS

In [29]:
df_f,datos_f = compas_dataset.fairness_eqa_eqi(features = ['priors','age'], 
                              target_column = 'proba', 
                              column_filter = ['sex'],
                              plot = False)
df_f

Unnamed: 0,group,reference,EQI,EQA,F
0,"['sex'](0.0,)","['sex'](1.0,)",-0.09,0.09,0.13


In [30]:
result_metrics = compas_dataset.fairness_metrics(target_column='label', predicted_column = 'Predicted',
                            columns_fair = {'sex' : {'privileged' : 1, 'unprivileged' : 0}})
df_performance,df_fairness = compas_dataset.get_df_metrics(metrics_json=result_metrics)
df_fairness

Calculating metrics for : sex  the value :  1
Calculating metrics for : sex  the value :  0


Unnamed: 0,EOD,DI,SPD,OD
sex_fair_metrics,-0.069058,0.826933,-0.115658,-0.145972


## German

In [31]:
df_f,datos_f = german_dataset.fairness_eqa_eqi(features = ['age','credit_history','savings','employment'], 
                              target_column = 'proba', 
                              column_filter = ['sex'],
                              plot = False)
df_f

Unnamed: 0,group,reference,EQI,EQA,F
1,"['sex'](0.0,)","['sex'](1.0,)",-0.06,0.04,0.07


In [33]:
result_metrics = german_dataset.fairness_metrics(target_column='label', predicted_column = 'Predicted',
                            columns_fair = {'sex' : {'privileged' : 1, 'unprivileged' : 0}})
df_performance,df_fairness = german_dataset.get_df_metrics(metrics_json=result_metrics)
df_fairness

Calculating metrics for : sex  the value :  1
Calculating metrics for : sex  the value :  0


Unnamed: 0,EOD,DI,SPD,OD
sex_fair_metrics,-0.066401,0.859265,-0.131557,-0.297248
