In [None]:
import pandas as pd
import xlsxwriter

In [None]:
## read metrics and concat

df_dte = pd.read_excel('dte_metrics.xlsx')
df_nn = pd.read_excel('nn_metrics.xlsx')
df_log = pd.read_excel('log_metrics.xlsx')
df_svm = pd.read_excel('svm_metrics.xlsx')

df = pd.concat([df_dte,df_nn])
df = pd.concat([df,df_log])
df = pd.concat([df,df_svm])

df.to_excel('all_metrics.xlsx')
df.to_pickle('all_metrics.pkl')

In [None]:
## grouping 

grouped = df.groupby(df.group)
df_abiEltern = grouped.get_group("abiEltern")
df_gender = grouped.get_group("gender")
df_erstsprache = grouped.get_group("erstsprache")
df_buecher = grouped.get_group("buecher")

In [None]:
"""
Function calculates fairness metrics pp, eo, sa and pe and return pivoted table
"""
def calculate_metrics(df, group_col, pp_col, eo_col, sa_col, pe_col, advantaged_group, disadvantaged_group):
    df = df.drop(columns=['group', 'Unnamed: 0', 'Accuracy'])
    df = pd.pivot_table(df, values=["Precision","Recall","AUC","FPR"], index=["model"], columns=[group_col])
    df[pp_col] = df.Precision[advantaged_group] - df.Precision[disadvantaged_group]
    df[eo_col] = df.Recall[disadvantaged_group] - df.Recall[advantaged_group]
    df[sa_col] = df.AUC[advantaged_group] - df.AUC[disadvantaged_group]
    df[pe_col] = df.FPR[disadvantaged_group] - df.FPR[advantaged_group]
    df = df.drop(columns=['AUC','Precision','Recall','FPR'])
    df.columns = df.columns.droplevel(1)
    df = pd.pivot_table(df, values=[pp_col,eo_col,sa_col,pe_col], columns=["model"])
    return df

# gender
df_gender = calculate_metrics(df_gender, 'subgroup', 'PP', 'EO', 'SA', 'PE','girls','boys')

# first language
df_erstsprache = calculate_metrics(df_erstsprache, 'subgroup', 'PP', 'EO', 'SA', 'PE','deutsch','migration')

# HLE
df_buecher = calculate_metrics(df_buecher, 'subgroup', 'PP', 'EO', 'SA', 'PE','buch1','buch0')

# parental education
df_abiEltern = calculate_metrics(df_abiEltern, 'subgroup', 'PP', 'EO', 'SA', 'PE','abi','keinAbi')


In [None]:
"""
function to format results
set two threshols: one at |0.02| in orange and one at |0.05| in red
format all negative values in bold
"""

def threshold001(v, props=''):
    return props if (v > 0.02) or (v < -0.02) else None

def threshold005(v, props=''):
    return props if (v > 0.05) or (v < -0.05) else None

def negativeValue(v, props=''):
    return props if (v < 0) else None

def showTable(df):
    styled = df.style.set_properties(color="black", align="right")\
        .set_properties(**{'background-color': 'white'})\
        .applymap(threshold001, props='color:orange;')\
        .applymap(threshold005, props='color:red;')\
        .applymap(negativeValue, props='font-weight:bold;')
    return styled

In [None]:
s = showTable(df_gender)
s

In [None]:
s = showTable(df_erstsprache)
s

In [None]:
s = showTable(df_abiEltern)
s

In [None]:
s = showTable(df_buecher)
s

In [None]:
## save as excel

writer = pd.ExcelWriter('fairness_double.xlsx', engine='xlsxwriter')

df_gender.to_excel(writer, sheet_name='Gender')
df_abiEltern.to_excel(writer, sheet_name='AbiEltern')
df_erstsprache.to_excel(writer, sheet_name='ErstSprache')
df_buecher.to_excel(writer, sheet_name='Buecher')
writer.save()