# 0. Imports and Arguments


In [None]:
import os
import sys
sys.path.append('../')

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from scipy import stats
import seaborn as sns;

from scripts.evaluation_utils import delayed_impact_csv, immediate_impact_csv, delayed_impact_german_csv,types_csvs

In [None]:
data_path = '../results/demo-0-lab-0/' # input the path to the results for a specific dataset
folders= ['dt','gnb','lgr','gbt']

# 1. Build useful CSVs

## 1.2 FP/TP/TN/FN Ratios

In [None]:
types_csvs(data_path, folders)

## 1.1 Impact

In [None]:
delayed_impact_csv(data_path,0, folders)
delayed_impact_csv(data_path,1, folders)

In [None]:
immediate_impact_csv(data_path,0, folders)
immediate_impact_csv(data_path,1, folders)

# 2. Analyzing Scores (only fico_data)

## 2.1 Extracting Scores from csv into dataframes

In [None]:
# Scores Data Frames
classifier_dfs = {}
dfs_b = {}
dfs_w = {}
# loading test set credit scores into dictinary from all models
for f in folders:
    path = f'{data_path}{f}/{f}_all_scores.csv'
    df = pd.read_csv(path)
    df = df.reset_index(drop=True)
    df = df.round(0)

    df_black = df.filter(like='B')
    df_white = df.filter(like='W')
    
    classifier_dfs[f] = df
    dfs_b[f] = df_black
    dfs_w[f] = df_white

## 2.2 Checking for normal distributions:

if p < 0.01 (or < 0.05) then the distribution is significantly different from a normal distribution

In [None]:
for c,df in classifier_dfs.items():
    print('Classifier:',c)
    for col in df:
        data=df[col].dropna(axis=0)
        _,p = stats.kstest(data, "norm") # comparing score distribution to normal distribution
        if p > 0.01:
            print(col,',p:',p)
    print('Check for normal distributions->done')

## 2.3 Significance of Score Distributions with Mann Whitney U test:


In [None]:
mwu_path = f'{data_path}mwu/'
os.makedirs(mwu_path,exist_ok = True)

### Variance of Distributions unmitigated v mitigated for each race

if p < 0.001 (or < 0.0005) then the distributions are significantly different from each other

In [None]:
def p_race_mwu(dfs, b_or_w = 'B'):
    p_vals = pd.DataFrame(data={'Constraints': []})
    p_signi = pd.DataFrame(data={'Constraints': []})
    
    for c,df in dfs.items():
        
        c = f'{c}{b_or_w}'
        col_signi = []
        col_vals = []
        idx = []
        
        data_unmiti = df[f'unmit{b_or_w}'].dropna(axis=0) # extract scores from unmitigated models
        df = df.iloc[:,1:]
        for col in df:
            
            idx.append(col[:-1])
            
            data_miti=df[col].dropna(axis=0) # extract scores from mitigated models
            
            _,p = stats.mannwhitneyu(data_unmiti, data_miti) # compare unmitigated and mitigated models
            col_vals.append(p)
            
            # p values < 0.05 the difference is significat
            if p< 0.05:
                col_signi.append('s')
            else:
                col_signi.append(' ')
                
        p_signi[c] = col_signi
        p_vals[c] = col_vals
    # set index
    p_vals['Constraints'] = idx
    p_vals = p_vals.set_index('Constraints')
    
    p_signi['Constraints'] = idx
    p_signi = p_signi.set_index('Constraints')
    
    p_vals = p_vals.round(decimals=3)
    print(p_signi)
    # save p_values and significance for all models
    p_vals.to_csv(f'{mwu_path}p_un_vs_miti_{b_or_w}.csv')
    p_signi.to_csv(f'{mwu_path}significanz_un_vs_miti_{b_or_w}.csv')
   

In [None]:
print('Black:')
p_race_mwu(dfs_b,'B')

print('\nWhite:')
p_race_mwu(dfs_w,'W')