In [1]:
from google.colab import drive

drive.mount("/content/drive")

%cd '/content/drive/MyDrive/ckd_uae/'

!source /content/drive/MyDrive/ckd_env/bin/activate

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
/content/drive/MyDrive/ckd_uae


In [2]:
# !pip install aequitas

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

import warnings

warnings.filterwarnings("ignore")

from functions import *  # import custom functions
from aequitas import Audit  # library for Bias & Fairness Analysis

## Data Source

https://data.mendeley.com/datasets/ppfwfpprbc/1

In [4]:
# Change directory to where functions.py is located if it's not in '/content'
data_original = "/content/drive/MyDrive/ckd_uae/data/df_original/"
data_eda = "/content/drive/MyDrive/ckd_uae/data/df_eda/"

In [5]:
# read the necessary parquet files from paths
df = pd.read_parquet(os.path.join(data_original, "df_original.parquet"))
df_eda = pd.read_parquet(os.path.join(data_eda, "df_eda.parquet"))
y_test = pd.read_parquet(os.path.join(data_original, "y_test.parquet"))
rf_score = pd.read_parquet(os.path.join(data_original, "rf_score.parquet"))

In [6]:
df_audit = df.copy(deep=True)

In [7]:
df_audit["sex_cat"] = df_audit["sex"].apply(lambda x: "Male" if x == 1 else "Female")

In [8]:
audit_sex = y_test.join(rf_score, on="id", how="inner").join(
    df_audit["sex_cat"], on="id", how="inner"
)

In [9]:
audit_sex.head()

Unnamed: 0_level_0,outcome,grid_search_rf_score,sex_cat
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2346,0,1,Male
2394,0,0,Male
692,0,0,Female
68,0,0,Female
320,1,1,Male


In [10]:
audit_sex.shape

(238, 3)

In [11]:
audit_sex = move_column_before(
    df=audit_sex, target_column="grid_search_rf_score", before_column="sex_cat"
)

In [12]:
audit_sex.shape

(238, 3)

In [13]:
audit = Audit(df=audit_sex, score_column="grid_search_rf_score", label_column="outcome")
audit.audit()

In [14]:
audit.confusion_matrix

Unnamed: 0_level_0,Unnamed: 1_level_0,pp,pn,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
sex_cat,Female,5,104,2,2,102,3,5,104,109,238
sex_cat,Male,14,115,6,8,107,8,16,113,129,238


In [15]:
audit.metrics.round(2)

Unnamed: 0_level_0,Unnamed: 1_level_0,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,ppr,pprev,prev
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
sex_cat,Female,0.96,0.6,0.98,0.02,0.4,0.02,0.4,0.98,0.6,0.26,0.05,0.05
sex_cat,Male,0.89,0.5,0.95,0.07,0.43,0.05,0.5,0.93,0.57,0.74,0.11,0.12


In [16]:
audit.disparity_df.style

Unnamed: 0,model_id,score_threshold,k,attribute_name,attribute_value,accuracy,tpr,tnr,for,fdr,fpr,fnr,npv,precision,pp,pn,ppr,pprev,fp,fn,tn,tp,group_label_pos,group_label_neg,group_size,total_entities,prev,ppr_disparity,pprev_disparity,precision_disparity,fdr_disparity,for_disparity,fpr_disparity,fnr_disparity,tpr_disparity,tnr_disparity,npv_disparity,ppr_ref_group_value,pprev_ref_group_value,precision_ref_group_value,fdr_ref_group_value,for_ref_group_value,fpr_ref_group_value,fnr_ref_group_value,tpr_ref_group_value,tnr_ref_group_value,npv_ref_group_value
0,0,binary 0/1,19,sex_cat,Female,0.963303,0.6,0.980769,0.019231,0.4,0.019231,0.4,0.980769,0.6,5,104,0.263158,0.045872,2,2,102,3,5,104,109,238,0.045872,0.357143,0.422674,1.05,0.933333,0.276442,0.362179,0.8,1.2,1.035766,1.054098,Male,Male,Male,Male,Male,Male,Male,Male,Male,Male
1,0,binary 0/1,19,sex_cat,Male,0.891473,0.5,0.946903,0.069565,0.428571,0.053097,0.5,0.930435,0.571429,14,115,0.736842,0.108527,6,8,107,8,16,113,129,238,0.124031,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,Male,Male,Male,Male,Male,Male,Male,Male,Male,Male


In [17]:
audit.disparities.style

Unnamed: 0_level_0,Unnamed: 1_level_0,ppr_disparity,pprev_disparity,precision_disparity,fdr_disparity,for_disparity,fpr_disparity,fnr_disparity,tpr_disparity,tnr_disparity,npv_disparity
attribute_name,attribute_value,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
sex_cat,Female,0.357143,0.422674,1.05,0.933333,0.276442,0.362179,0.8,1.2,1.035766,1.054098
sex_cat,Male,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0


In [18]:
metrics = ["fpr", "fdr", "pprev"]
disparity_tolerance = 1.25

In [19]:
audit_sex_groups = Audit(
    df=audit_sex,
    score_column="grid_search_rf_score",
    label_column="outcome",
    reference_groups={"sex_cat": "Male"},
)
audit_sex_groups.audit()

In [20]:
summary_plot_ckd = audit_sex_groups.summary_plot(metrics=metrics, fairness_threshold=disparity_tolerance,)
summary_plot_ckd

In [21]:
disparity_plot_ckd = audit.disparity_plot(
    metrics=metrics, attribute="sex_cat", fairness_threshold=disparity_tolerance
)
disparity_plot_ckd