# Imports and Setups

In [None]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import os
import matplotlib.pyplot as plt
import numpy as np
import plotnine as p9
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelBinarizer
from sklearn.pipeline import Pipeline
import transformer
from sklearn import metrics
from fairnesTester import FairnessTester
from sklearn.pipeline import FeatureUnion
from sklearn.model_selection import train_test_split
from fairnesTester import FairnessTester

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier, plot_tree, export_graphviz
from sklearn.ensemble import RandomForestClassifier, AdaBoostClassifier, GradientBoostingClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.gaussian_process import GaussianProcessClassifier
from xgboost import XGBClassifier

#pipelines and transformer

cat_trans = Pipeline(steps=[
    ("selector", transformer.DataSelector("object")),
    ("one_hot", preprocessing.OneHotEncoder())
])
num_trans = Pipeline(steps=[
    ("selector", transformer.DataSelector("number")),
    ("scaler", StandardScaler() )
])

pre_pipe = FeatureUnion(transformer_list=[
    ("cat", cat_trans),
    ("num", num_trans)
])

lb = LabelBinarizer()
del_nan = transformer.DeleteNAN("")

# Importing and preparing the data


In [None]:
datasets = []

## Adult Income Data set

In [None]:
filename = "Datasets/adult.data"
names = ["age", "workclass", "fnlwgt","education", "education-num", "marital-status", "occupation", "relationship", "race", "sex", "capital-gain", "capital-loss", "hours-per-week", "native-country", "class"]
train = pd.read_csv(filename, names=names)
test = test = pd.read_csv("Datasets/adult.test", names=names)

del_nan.set_nan_char(" ?")
train = del_nan.transform(train)
test = del_nan.transform(test)

train["class"] = lb.fit_transform(train["class"])
test["class"] = lb.fit_transform(test["class"])

train_data = pre_pipe.fit_transform(train.drop("class", axis=1))
train_labels = train["class"]

test_data = pre_pipe.transform(test.drop("class", axis=1))
test_labels = test["class"]



#attribute for Fairness tester
dataset_name = "Adult_Income"
priv_val = " Male"
unpriv_val = " Female"
protected_att = "sex"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))

## German Credit Dataset

In [None]:
filename = "Datasets/german.data"
names = ["status existing account","duration", "credit history", "purpose", "credit amount", "savings", "employment since", "installment rate", "sex", "other debtors", "residence since", "property", "age", "installment plans", "housing", "num existing credits", "job", "no of pople liable", "telephone", "foreign worker", "class" ]
data = pd.read_csv(filename, sep=" ", names =names)

data["class"] = lb.fit_transform(data["class"])

data, test, train_labels, test_labels = train_test_split(data, data["class"], random_state=42)

train_data = pre_pipe.fit_transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

#transform for Fairness tester
test["sex"].replace(["A91","A93", "A94"],"male",inplace=True)
test["sex"].replace(["A92","A95"],"female",inplace=True)

#attribute for Fairness tester
dataset_name = "German_Credit"
priv_val = "male"
unpriv_val = "female"
protected_att = "sex"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))

## Default of Credit Card Payments

In [None]:
filename = "Datasets/default of credit.xls"
data_inp = pd.read_excel(filename, dtype={"X1": int,"X2": object,"X3": object,"X4": object,"X5": object,"X6": object,"X7": object,"X8": object,"X9": object,"X10": object,"X11": object,"X12": int,"X13": int,"X14": int,"X15": int,"X16": int,"X17": int,"X23": int,"X18": int,"X19": int,"X20": int,"X21": int,"X22": int})

data_inp = data_inp.rename(columns={"Y": "class"})

data, test, train_labels, test_labels = train_test_split(data_inp, data_inp["class"], random_state=42)

pre_pipe.fit(data_inp.drop("class", axis=1))
train_data = pre_pipe.transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

test["X2"].replace([1],"male",inplace=True)
test["X2"].replace([2],"female",inplace=True)

#attribute for Fairness tester
dataset_name = "Default_Of_Credit"
priv_val = "male" #male
unpriv_val = "female" #female
protected_att = "X2"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))


## Rici vs Stefano Dataset

In [None]:
filename = "Datasets/ricci.csv"
data_inp = pd.read_csv(filename).drop("Unnamed: 0", axis=1)
#applicants with combine >= 70 pass
data_inp.rename(columns={"Combine": "class"}, inplace=True)

data_inp.loc[(data_inp["class"]<70), "class"] = 0
data_inp.loc[data_inp["class"]>=70, "class"] = 1


data, test, train_labels, test_labels = train_test_split(data_inp, data_inp["class"], random_state=42)

pre_pipe.fit(data_inp.drop("class", axis=1))
train_data = pre_pipe.transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

#transform for Fairness tester
test["Race"].replace(["H","B",],"non-white",inplace=True)
test["Race"].replace(["W"],"white",inplace=True)


#attribute for Fairness tester
dataset_name = "Ricci_vs_Stefano"
priv_val = "white" #white
unpriv_val = "non-white" #not white
protected_att = "Race"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))

## Heart Disease Dataset

In [None]:
filename = "Datasets/processed.cleveland.data"
names = ["age", "sex", 3,4,5,6,7,8,9,10,11,12,13,"class"]
data_inp = pd.read_csv(filename, names=names)


data_inp.loc[data_inp["class"]>=1, "class"] = 1 #existing heart disase

data, test, train_labels, test_labels = train_test_split(data_inp, data_inp["class"], random_state=42)

pre_pipe.fit(data_inp.drop("class", axis=1))
train_data = pre_pipe.transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

test["sex"].replace([1],"male",inplace=True)
test["sex"].replace([2],"female",inplace=True)

#attribute for Fairness tester
dataset_name = "Heart_Diseases"
priv_val = "male" #male
unpriv_val = "female" #female
protected_att = "sex"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))

## Heart Failure Dataset

In [None]:
filename = "Datasets/heart_failure.csv"
data_inp = pd.read_csv(filename)
data_inp.rename(columns={"DEATH_EVENT":"class"}, inplace=True)

data, test, train_labels, test_labels = train_test_split(data_inp, data_inp["class"], random_state=42)


pre_pipe.fit(data_inp.drop("class", axis=1))
train_data = pre_pipe.transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

test["sex"].replace([1],"male",inplace=True)
test["sex"].replace([0],"female",inplace=True)

#attribute for Fairness tester
dataset_name = "Heart_Failure"
priv_val = "male" #male
unpriv_val = "female" #female
protected_att = "sex"

datasets.append((test,dataset_name,priv_val,unpriv_val,protected_att, train_data, train_labels,test_data, test_labels))

## Student Performance Data Set

In [None]:
filename = "Datasets/student-por.csv"
data_inp = pd.read_csv(filename, sep=";")

data_inp.drop(["G1","G2"],axis=1,inplace=True)
data_inp.rename(columns={"G3":"class"},inplace=True)

data_inp.loc[(data_inp["class"]<10), "class"] = 0 #failed
data_inp.loc[data_inp["class"]>=10, "class"] = 1 #passed


data, test, train_labels, test_labels = train_test_split(data_inp, data_inp["class"], random_state=42)


pre_pipe.fit(data_inp.drop("class", axis=1))
train_data = pre_pipe.transform(data.drop("class", axis=1))
test_data = pre_pipe.transform(test.drop("class", axis=1))

test["sex"].replace(["M"],"male",inplace=True)
test["sex"].replace(["F"],"female",inplace=True)

#attribute for Fairness tester
dataset_name = "Student_Performance"
priv_val = "male" #male
unpriv_val = "female" #female
protected_att = "sex"

datasets.append((test, dataset_name, priv_val, unpriv_val, protected_att, train_data, train_labels,test_data, test_labels))

## Baserates

In [None]:
#baserates

i=0
baserates = pd.DataFrame(columns = ["dataset", "Privileged Attribute", "Unprivileged Attribute", "Total Entrys", "Total Rate Privileged", "Total Rate Unprivileged", "Rate of Positives Privileged", "Rate of Positives Unprivileged" ])

for dataset in datasets:
    
    test = dataset[0]
    dataset_name = dataset[1]
    priv_val = dataset[2]
    unpriv_val = dataset[3]
    protected_att=dataset[4]
    
    total = test["class"].count()

    priv_total = test.loc[(test[protected_att]==priv_val)]["class"].count()
    priv_total_rate = priv_total/total
    priv_positive_rate = test.loc[(test[protected_att]==priv_val)&(test["class"]==1)]["class"].count() / priv_total


    unpriv_total = test.loc[(test[protected_att]==unpriv_val)]["class"].count()
    unpriv_total_rate = unpriv_total/total
    unpriv_positive_rate = test.loc[(test[protected_att]==unpriv_val)&(test["class"]==1)]["class"].count() / priv_total

    baserates.loc[len(baserates)]=[dataset_name,priv_val,unpriv_val,total,priv_total_rate,unpriv_total_rate,priv_positive_rate,unpriv_positive_rate ]
print(baserates.to_latex(index=False))


# Classifiers

In [None]:
#set up list of classifiers
classifiers = [DecisionTreeClassifier(random_state=42),RandomForestClassifier(random_state=42),SVC(),AdaBoostClassifier(),KNeighborsClassifier(5), GaussianNB(), XGBClassifier()]
model_names = []
for model in classifiers:
    name = model.__class__.__name__
    model_names.append(name)
#train and run classifiers and safe prediction in dataframe
for dataset in datasets:
    print(dataset[1])
    train_data = dataset[5]
    train_labels = dataset[6]
    test_data = dataset[7]
    
    for model in classifiers:
        
        name = model.__class__.__name__
        print(name)
        

        model.fit(train_data.toarray(), train_labels)
        pred = model.predict(test_data.toarray())

        dataset[0][name]=pred

In [None]:
#save predictions to csv
for dataset in datasets:
    dataset[0].to_csv("results/predictions/"+dataset[1]+".csv")
    frame = pd.DataFrame(columns=["dataset_name","priv_val","unpriv_val","protected_att"])
    dataset_name = dataset[1]
    priv_val = dataset[2]
    unpriv_val = dataset[3]
    protected_att=dataset[4]
    frame.loc[len(frame)] = [dataset_name, priv_val, unpriv_val, protected_att]
    frame.to_csv("results/predictions/"+dataset[1]+"_attributes.csv")


In [None]:
#load predictions from csv
dataset_names = ["Adult_Income", "Default_Of_Credit", "German_Credit", "Heart_Diseases", "Heart_Failure", "Ricci_vs_Stefano", "Student_Performance"]
definitions_names = ["statistical parity", "predictive parity", "negative predictive parity", "equal opportunity", "predictive equality", "overall accuracy equality", "treatment equality"]
classifiers = [DecisionTreeClassifier(random_state=42),RandomForestClassifier(random_state=42),SVC(),AdaBoostClassifier(),KNeighborsClassifier(5), GaussianNB(), XGBClassifier()]
model_names = []

for model in classifiers:
    
    name = model.__class__.__name__
    model_names.append(name)

datasets=[]
for dataset in dataset_names:
    test = pd.read_csv("results/predictions/"+dataset+".csv")
    att = pd.read_csv("results/predictions/"+dataset+"_attributes.csv")
    dataset_name = att["dataset_name"].item()
    priv_val = att["priv_val"].item()
    unpriv_val = att["unpriv_val"].item()
    protected_att=att["protected_att"].item()
    datasets.append((test, dataset_name, priv_val, unpriv_val, protected_att))


## Accuracy

In [None]:
#calculate accuracy
from sklearn.metrics import accuracy_score

accuracy = pd.DataFrame(columns=["dataset", "model", "accuracy"])
for dataset in datasets:
    for name in model_names:
        acc = accuracy_score(dataset[0]["class"],dataset[0][name])
        accuracy.loc[len(accuracy)]=[dataset[1],name,acc]

In [None]:
#plot accuracy for each dataset

plot = (p9.ggplot(data= accuracy, mapping = p9.aes(x="model", y="accuracy")) 
        + p9.geom_col(position="dodge")
        + p9.facet_grid(".~dataset") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Accuracy", title=("Accuracy for each dataset"))
        )
plot.save(filename="plots/"+"accuracy_overall.png", height=4 , width = 17)

In [None]:
#plot average accuracy
plot = (p9.ggplot(data= accuracy.groupby(["model"], as_index=False).mean(), mapping = p9.aes(x="model", y="accuracy")) 
        + p9.geom_col(position="dodge")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Accuracy", title=("Avarage Accuracy"))
        )
plot.save(filename="plots/"+"accuracy_avarage.png")

# Testing for Fairness


## testing classifiers list

In [None]:
tester = FairnessTester()
results=[]


#run the tester on the results and safe results in dataframe
for dataset in datasets:
    test = dataset[0]
    dataset_name = dataset[1]
    priv_val = dataset[2]
    unpriv_val = dataset[3]
    protected_att=dataset[4]
    
    result_df = pd.DataFrame()
    
    for name in model_names:
        tester.setup(test, protected_att, priv_val, unpriv_val, name)
        result_dic = {"model": name}
        result_dic.update(tester.confusion_based_dic_priv())
        result_df= result_df.append(result_dic, ignore_index=True)
        
        result_dic = {"model": name}
        result_dic.update(tester.confusion_based_dic_unpriv())
        result_df= result_df.append(result_dic, ignore_index=True)

definitions_names = list(tester.confuison_based_dic().keys())

#safe results as csv
#result_df.to_csv("results/fairness/"+dataset_name+".csv")

## Load Fairness Results from CSV 

In [None]:
#read all data into a list of results
dataset_names = ["Adult_Income", "Default_Of_Credit", "German_Credit", "Heart_Diseases", "Heart_Failure", "Ricci_vs_Stefano", "Student_Performance"]
definitions_names = ["statistical parity", "predictive parity", "negative predictive parity", "equal opportunity", "predictive equality", "overall accuracy equality", "treatment equality"]
classifiers = [DecisionTreeClassifier(random_state=42),RandomForestClassifier(random_state=42),SVC(),AdaBoostClassifier(),KNeighborsClassifier(5), GaussianNB(), XGBClassifier()]
model_names = []

for model in classifiers:
    
    name = model.__class__.__name__
    model_names.append(name)

#create list of results
results=[]
for dataset in dataset_names:
    result = pd.read_csv("results/fairness/"+dataset+".csv")
    results.append(result)

## Prepare Results for Plotting

In [None]:
#transform to better selectable format for all datasets
full_result_df = pd.DataFrame()
i=0
for result in results:
    result_df = result
    for model in model_names:
        for defi in definitions_names:
            for group in ["priv", "unpriv"]:    
                if defi == "treatment equality":   #skip treatment equality as it will destroy the scales for full plot
                    continue 
                dic = {}
                dic["model"] = result_df.loc[(result_df["model"]==model)&(result_df["group"]==group)]["model"].item()
                dic["group"] = result_df.loc[(result_df["model"]==model)&(result_df["group"]==group)]["group"].item()
                dic["definition"] = defi
                dic["result"]= result_df.loc[(result_df["model"]==model)&(result_df["group"]==group)][defi].item()
                dic["dataset"] = dataset_names[i]
                full_result_df = full_result_df.append(dic, ignore_index=True)
    i+=1

In [None]:
#calculate differences for all datasets

full_differences = pd.DataFrame()
i=0
for result in results:
    result_df = result
    differences = pd.DataFrame()
    for model in model_names:
        for defi in definitions_names:   
            if defi == "treatment equality":   #skip treatment equality as it will destroy the scales for full plot
                continue          
            dic = {}
            dic["model"] = result_df.loc[(result_df["model"]==model)&(result_df["group"]==group)]["model"].item()
            x = result_df.loc[(result_df["model"]==model)&(result_df["group"]=="priv")][defi].item()
            y = result_df.loc[(result_df["model"]==model)&(result_df["group"]=="unpriv")][defi].item()
            dic["definition"] = defi
            diff = abs(x-y)
            dic["difference"]= diff
            if (diff <=0.1): # implementing threshold for consideration of fairness/unfairness
                dic["fairness"]="Fair"
            else:
                dic["fairness"]="Unfair"
            differences = differences.append(dic, ignore_index=True)
    differences["dataset"]=dataset_names[i]
    full_differences = full_differences.append(differences, ignore_index=True)
    
    i+=1  


In [None]:
#calculate ratio for all datasets
full_ratio = pd.DataFrame()
i=0
for result in results:
    result_df = result
    ratio = pd.DataFrame()
    for model in model_names:
        for defi in definitions_names:   
            if defi == "treatment equality":   #skip treatment equality as it will destroy the scales for full plot
                continue          
            dic = {}
            dic["model"] = result_df.loc[(result_df["model"]==model)&(result_df["group"]==group)]["model"].item()
            x = result_df.loc[(result_df["model"]==model)&(result_df["group"]=="priv")][defi].item()
            y = result_df.loc[(result_df["model"]==model)&(result_df["group"]=="unpriv")][defi].item()
            dic["definition"] = defi
            if (x!=0.0):
                rat = (y/x)  
            else:
                rat = 0 
            dic["ratio"]= rat
            if (rat <=0.8 or rat>=1.25): # implementing threshold for consideration of fairness/unfairness
                dic["fairness"]="Unfair"
            else:
                dic["fairness"]="Fair"
            ratio = ratio.append(dic, ignore_index=True)
    ratio["dataset"]=dataset_names[i]
    full_ratio = full_ratio.append(ratio, ignore_index=True)
    
    i+=1  

In [None]:
#average  difference vs ratio vs accuracy in DataFrame

average_differences = full_differences.groupby(["dataset","model"], as_index=False).mean()
average_ratio = full_ratio.groupby(["dataset","model"], as_index=False).mean()

accuracy_fairness = pd.DataFrame(columns=["dataset","model" ,"difference", "ratio", "accuracy"])

for dataset in dataset_names:
    for model in model_names:
        acc = accuracy.loc[(accuracy["dataset"]==dataset)&(accuracy["model"]==model)]["accuracy"].item()
        diff =  average_differences.loc[(average_differences["dataset"]==dataset)&(average_differences["model"]==model)]["difference"].item()
        rat = average_ratio.loc[(average_ratio["dataset"]==dataset)&(average_ratio["model"]==model)]["ratio"].item()
        accuracy_fairness.loc[len(accuracy_fairness)]=[dataset,model,diff,rat,acc]

# Plotting

In [None]:
#overall plot for all definitions with color
#skip treatment equality as it will destory the scales
for dataset_name in dataset_names:
    plot = (p9.ggplot(data= full_result_df.loc[full_result_df["dataset"]==dataset_name], mapping = p9.aes(x="model", y="result", fill="group")) 
        + p9.geom_col(position="dodge")
        + p9.facet_grid(".~definition") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Results", title=(" Complete results for " + dataset_name))
        )
    plot.save(filename="plots/full_results/"+dataset_name+"_complete_color.png", height=4 , width = 17)

## Difference

In [None]:
#accuracy vs fariness scatterplot
plot = (p9.ggplot(data= accuracy_fairness, mapping = p9.aes(x="difference", y="accuracy", color="model")) 
        + p9.geom_point()
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Average difference", y= "Accuracy", title="Average difference in fairness definitions vs Accuracy")
        )
plot.save(filename="plots/difference/fairness_vs_accuracy_diff.png")

In [None]:
#plot differences for each dataset
for dataset_name in dataset_names: 
    plot = (p9.ggplot(data= full_differences.loc[full_differences["dataset"]==dataset_name], mapping = p9.aes(x="model", y="difference", fill="fairness")) 
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.geom_col(position="dodge")
        + p9.facet_grid(".~definition", space="free_x", scales="fixed") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Differences", title=("Differences for " + dataset_name))
        + p9.ylim(0,1)
        + p9.scale_fill_manual(values=("green","red"))
        )
    plot.save(filename="plots/difference/datasets/"+dataset_name+"_differences_scaled.png", height=4 , width = 17)

In [None]:
# plot differences for each classifiers over all datasets
for model in model_names: 
    plot = (p9.ggplot(data= full_differences.groupby(["model","definition"], as_index=False).mean().loc[full_differences["model"]==model], 
            mapping = p9.aes(x="definition", y="difference",)) 
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.geom_col(position="dodge")
        #+ p9.facet_grid(".~model", space="free_x", scales="fixed") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Definition", y= "Differences", title=("Differences for " + model + " over all datasets"))
        + p9.ylim(0,1)
        + p9.scale_fill_manual(values=("green","red"))
        )
    plot.save(filename="plots/difference/models/"+model+"_differences.png", height=4 , width = 17)

In [None]:
#differences for each definition on all datasets
for definition in definitions_names:
        if definition=="treatment equality":
            continue
        plot = (p9.ggplot(data= full_differences.loc[(full_differences["definition"]==definition)],
                        mapping = p9.aes(x="model", y="difference", fill="fairness")) 
                + p9.geom_hline(yintercept = 0.1, color="red")
                + p9.geom_col(position="dodge")
                + p9.facet_wrap("dataset")
                + p9.theme(axis_text_x = p9.element_text(angle=90))
                + p9.labs(x="ML Models", y= "Differences", title=("Differences for "+definition))
                + p9.ylim(0,1)
                + p9.scale_fill_manual(values=({"Fair":"green","Unfair":"red"}))
                )
        plot.save(filename="plots/difference/definitions/"+definition+"_differences.png")

In [None]:
#boxplots for difference on classifiers over all datasets for each definition

plot = (p9.ggplot(data= full_differences, mapping = p9.aes(x="model", y= "difference"))
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.facet_grid(".~definition",space="free_x", scales="fixed")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Differences", title=("Boxplots for all ML models"))
        )
plot.save(filename="plots/difference/boxplot_models_diff.png",height=4 , width = 17)

In [None]:
#boxplot for difference of classifiers over all definitions

plot = (p9.ggplot(data= full_differences, mapping = p9.aes(x="model", y= "difference"))
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Differences", title=("Boxplots for all ML models"))
        )
plot.save(filename="plots/difference/boxplot_models_overall_diff.png")

In [None]:
#plot mean and range of differences 

plot = (p9.ggplot(data= full_differences, mapping = p9.aes(x="model", y= "difference"))
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.stat_summary(fun_y = np.mean, fun_ymin=np.min, fun_ymax=np.max)
        + p9.facet_grid(".~definition",space="free_x", scales="fixed")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Differences", title=("Average and range for all ML models"))
        )
plot.save(filename="plots/difference/average_models_diff.png", width=17, height=4)

In [None]:
#boxplot for difference over definitions
plot = (p9.ggplot(data= full_differences, mapping = p9.aes(x="definition", y= "difference"))
        + p9.geom_hline(yintercept = 0.1, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Fairness Definitions", y= "Differences", title=("Boxplots for all definitions"))
        )
plot.save(filename="plots/difference/boxplot_definitions_diff.png")

## Ratio

In [None]:
#accuracy vs fariness ratio scatterplot
plot = (p9.ggplot(data= accuracy_fairness, mapping = p9.aes(x="ratio", y="accuracy", color="model")) 
        + p9.geom_point()
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Average ratio", y= "Accuracy", title="Average Fairness (ratio) vs Accuracy")
        )
plot.save(filename="plots/ratio/fairness_vs_accuracy_ratio.png")

In [None]:
#plot ratio for each dataset
for dataset_name in dataset_names: 
    plot = (p9.ggplot(data= full_ratio.loc[full_ratio["dataset"]==dataset_name], mapping = p9.aes(x="model", y="ratio", fill="fairness")) 
        + p9.geom_hline(yintercept = 0.8, color="red")
        + p9.geom_hline(yintercept = 1, color="green")
        + p9.geom_hline(yintercept = 1.25, color="red")
        + p9.geom_col(position="dodge")
        + p9.facet_grid(".~definition", space="free_x", scales="fixed") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Ratio", title=("Ratio for " + dataset_name))
        + p9.scale_fill_manual(values=("green","red"))
        )
    plot.save(filename="plots/ratio/datasets/"+dataset_name+"_ratio.png", height=4 , width = 17)

In [None]:
# plot ratio for each classifiers over all datasets
for model in model_names: 
    plot = (p9.ggplot(data= full_ratio.groupby(["model","definition"], as_index=False).mean().loc[full_ratio["model"]==model], 
            mapping = p9.aes(x="definition", y="ratio",)) 
        + p9.geom_hline(yintercept = 0.8, color="red")
        + p9.geom_hline(yintercept = 1, color="green")
        + p9.geom_hline(yintercept = 1.25, color="red")
        + p9.geom_col(position="dodge")
        #+ p9.facet_grid(".~model", space="free_x", scales="fixed") 
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Definition", y= "Ratio", title=("Ratio for " + model + " over all datasets"))
        )
    plot.save(filename="plots/ratio/models/"+model+"_ratio.png", height=4 , width = 17)

In [None]:
#ratio for each definition on all datasets
for definition in definitions_names:
        if definition=="treatment equality":
            continue
        plot = (p9.ggplot(data= full_ratio.loc[(full_ratio["definition"]==definition)],
                        mapping = p9.aes(x="model", y="ratio", fill="fairness")) 
                + p9.geom_hline(yintercept = 0.8, color="red")
                + p9.geom_hline(yintercept = 1, color="green")
                + p9.geom_hline(yintercept = 1.25, color="red")
                + p9.geom_col(position="dodge")
                + p9.facet_wrap("dataset")
                + p9.theme(axis_text_x = p9.element_text(angle=90))
                + p9.labs(x="ML Models", y= "Ratio", title=("Ratio for "+definition))
                
                + p9.scale_fill_manual(values=({"Fair":"green","Unfair":"red"}))
                )
        plot.save(filename="plots/ratio/definitions/"+definition+"_ratio.png")

In [None]:
#boxplots for difference on classifiers over all datasets for each definition

plot = (p9.ggplot(data= full_ratio, mapping = p9.aes(x="model", y= "ratio"))
        + p9.geom_hline(yintercept = 0.8, color="red")
        + p9.geom_hline(yintercept = 1, color="green")
        + p9.geom_hline(yintercept = 1.25, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.facet_grid(".~definition",space="free_x", scales="fixed")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Ratio", title=("Boxplots of Ratio for all ML models"))
        )
plot.save(filename="plots/ratio/boxplot_models_ratio.png",height=4 , width = 17)

In [None]:
#boxplot for difference of classifiers over all definitions

plot = (p9.ggplot(data= full_ratio, mapping = p9.aes(x="model", y= "ratio"))
        + p9.geom_hline(yintercept = 0.8, color="red")
        + p9.geom_hline(yintercept = 1, color="green")
        + p9.geom_hline(yintercept = 1.25, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="ML Models", y= "Ratio", title=("Boxplots of Ratio for all ML models over all Datasets"))
        )
plot.save(filename="plots/ratio/boxplot_models_overall_ratio.png")

In [None]:
#boxplot for ratio over definitions
plot = (p9.ggplot(data= full_ratio, mapping = p9.aes(x="definition", y= "ratio"))
        + p9.geom_hline(yintercept = 0.8, color="red")
        + p9.geom_hline(yintercept = 1, color="green")
        + p9.geom_hline(yintercept = 1.25, color="red")
        + p9.geom_boxplot(color="black", fill="darkgrey")
        + p9.theme(axis_text_x = p9.element_text(angle=90))
        + p9.labs(x="Fairness Definitions", y= "Ratio", title=("Boxplots for Ratio over all definitions"))
        )
plot.save(filename="plots/ratio/boxplot_definitions_ratio.png")