In [4]:
from numpy import loadtxt
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [5]:
import pandas as pd


In [26]:
original = pd.read_pickle('original.pickle')
mitigated = pd.read_pickle('final.pickle')

In [27]:
original_X = original[['Parch', 'Sex', 'Pclass', 'Embarked', 'SibSp']]
original_y = original[['Survived']]
mitigated_X = mitigated[['Parch', 'Sex', 'Pclass', 'Embarked', 'SibSp']]
mitigated_y = mitigated[['Survived']]

In [28]:
original_X_train, original_X_test, original_y_train, original_y_test = train_test_split(original_X, original_y, test_size=0.3, random_state=54)
mitigated_X_train, mitigated_X_test, mitigated_y_train, mitigated_y_test = train_test_split(mitigated_X, mitigated_y, test_size=0.3, random_state=54)

In [29]:
# fit model no training data
model_original = XGBClassifier()
model_original.fit(original_X_train, original_y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_bin=256, max_cat_threshold=64, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, ...)

In [30]:
model_mitigated = XGBClassifier()
model_mitigated.fit(mitigated_X_train, mitigated_y_train)

XGBClassifier(base_score=0.5, booster='gbtree', callbacks=None,
              colsample_bylevel=1, colsample_bynode=1, colsample_bytree=1,
              early_stopping_rounds=None, enable_categorical=False,
              eval_metric=None, feature_types=None, gamma=0, gpu_id=-1,
              grow_policy='depthwise', importance_type=None,
              interaction_constraints='', learning_rate=0.300000012,
              max_bin=256, max_cat_threshold=64, max_cat_to_onehot=4,
              max_delta_step=0, max_depth=6, max_leaves=0, min_child_weight=1,
              missing=nan, monotone_constraints='()', n_estimators=100,
              n_jobs=0, num_parallel_tree=1, predictor='auto', random_state=0, ...)

In [31]:
original_y_test['Predicted'] = model_original.predict(original_X_test)
mitigated_y_test['Predicted'] = model_mitigated.predict(mitigated_X_test)

In [32]:
original_X_test[['Survived', 'Predicted']] = original_y_test[['Survived', 'Predicted']]
mitigated_X_test[['Survived', 'Predicted']] = mitigated_y_test[['Survived', 'Predicted']]

In [33]:
from FLAI import data
original_dataset = data.Data(original_X_test, transform=False)
mitigated_dataset = data.Data(mitigated_X_test, transform=False)

In [34]:
original_dataset.fairness_metrics(target_column='Survived', predicted_column = 'Predicted',
                            columns_fair = {'Sex' : {'privileged' : 0, 'unprivileged' : 1}})

Calculating metrics for : Sex  the value :  0
Calculating metrics for : Sex  the value :  1


{'model': {'ACC': 0.7649253731343284,
  'TPR': 0.67,
  'FPR': 0.17857142857142858,
  'FNR': 0.33,
  'PPP': 0.3619402985074627},
 'Sex': {'privileged': {'ACC': 0.7555555555555555,
   'TPR': 0.9538461538461539,
   'FPR': 0.76,
   'FNR': 0.046153846153846156,
   'PPP': 0.9},
  'unprivileged': {'ACC': 0.7696629213483146,
   'TPR': 0.14285714285714285,
   'FPR': 0.07692307692307693,
   'FNR': 0.8571428571428571,
   'PPP': 0.0898876404494382},
  'fair_metrics': {'Equal_Opportunity_Difference': -0.810989010989011}}}

In [35]:
mitigated_dataset.fairness_metrics(target_column='Survived', predicted_column = 'Predicted',
                            columns_fair = {'Sex' : {'privileged' : 0, 'unprivileged' : 1}})

Calculating metrics for : Sex  the value :  0
Calculating metrics for : Sex  the value :  1


{'model': {'ACC': 0.6753731343283582,
  'TPR': 0.4326923076923077,
  'FPR': 0.17073170731707318,
  'FNR': 0.5673076923076923,
  'PPP': 0.27238805970149255},
 'Sex': {'privileged': {'ACC': 0.6413043478260869,
   'TPR': 0.3235294117647059,
   'FPR': 0.1724137931034483,
   'FNR': 0.6764705882352942,
   'PPP': 0.22826086956521738},
  'unprivileged': {'ACC': 0.6931818181818182,
   'TPR': 0.4857142857142857,
   'FPR': 0.16981132075471697,
   'FNR': 0.5142857142857142,
   'PPP': 0.29545454545454547},
  'fair_metrics': {'Equal_Opportunity_Difference': 0.1621848739495798}}}

In [None]:
#https://transformernlp.medium.com/ai-fairness-a-brief-introduction-to-ai-fairness-360-b2e39c96ca49
#https://github.com/Trusted-AI/AIF360/blob/master/examples/README.md

In [25]:
mitigated_dataset.fairness_metrics(target_column='Survived', predicted_column = 'Predicted',
                            columns_fair = {'Sex' : {'privileged' : 0, 'unprivileged' : 1}})

Calculating metrics for : Sex  the value :  0
Calculating metrics for : Sex  the value :  1


{'model': {'ACC': 0.6865671641791045,
  'TPR': 0.5978260869565217,
  'FPR': 0.26704545454545453,
  'FNR': 0.40217391304347827,
  'PPP': 0.3805970149253731},
 'Sex': {'privileged': {'ACC': 0.6966292134831461,
   'TPR': 0.4,
   'FPR': 0.1875,
   'FNR': 0.6,
   'PPP': 0.24719101123595505},
  'unprivileged': {'ACC': 0.6815642458100558,
   'TPR': 0.6716417910447762,
   'FPR': 0.3125,
   'FNR': 0.3283582089552239,
   'PPP': 0.44692737430167595},
  'fair_metrics': {'Equal_Opportunity_Difference': 0.27164179104477615}}}