In [11]:
import pandas as pd
import numpy as np
import onnxruntime as rt
import matplotlib.pyplot as plt

In [12]:
data = pd.read_csv('../data/all_data.csv')

test_data = data.drop(['Ja', 'Nee'], axis=1)

session_model1 = rt.InferenceSession('../part2_models/model_1.onnx')
session_model2 = rt.InferenceSession('../part2_models/model_2.onnx')

Gender - persoon_geslacht_vrouw (binary)

Single mother - persoon_geslacht_vrouw (binary) + relatie_kind_heeft_kinderen (binary) + relatie_partner_huidige_partner___partner__gehuwd_ (binary) ?

Multiple children - relatie_kind_huidige_aantal > 1 (JY) (int)

Migrant - persoonlijke_eigenschappen_taaleis_voldaan (als proxy?) of pla_historie_werk_en_inburgering / typering_hist_inburgeringsbehoeftig



In [13]:
#simple statistical parity as explained in Lighthouse report.
def simple_statistical_parity_binary(feature_name, test_data, session):
    data_zero = test_data[test_data[feature_name] == 0]
    data_one = test_data[test_data[feature_name] == 1]
    
    X_zero = data_zero.drop(['checked'], axis=1)
    X_one = data_one.drop(['checked'], axis=1)
    
    input_name = session.get_inputs()[0].name
    
    y_pred_onnx_zero = session.run( None, {input_name: X_zero.values.astype('float32')} )[0]
    y_pred_onnx_one = session.run( None, {input_name: X_one.values.astype('float32')} )[0]
    
    parity_zero = round(y_pred_onnx_zero.sum() / len(y_pred_onnx_zero), 3)
    parity_one = round(y_pred_onnx_one.sum() / len(y_pred_onnx_one), 3)

    print('========')
    print(f'{feature_name}')
    print('Parity (0): ', parity_zero)
    print('Parity (1): ', parity_one)
    
    abs_diff = np.abs(parity_zero - parity_one)
    abs_percent_diff = np.abs(parity_zero - parity_one) / ((parity_zero + parity_one) / 2) * 100
    
    print('Absolute difference: ', round(abs_diff, 3))
    print('Absolute Percentage difference: ', round(abs_percent_diff, 1))
    
    impact_disparity = min(parity_zero, parity_one) / max(parity_zero, parity_one)
    if (impact_disparity < 0.8):
        print(f'Impact disparity: {impact_disparity} < 0.8 - BAD')
    else:
        print(f'Impact disparity: {impact_disparity} >= 0.8 - GOOD')

In [14]:
#simple statistical parity as explained in Lighthouse report.
def simple_statistical_multiple_children(test_data, session):
    data_less_than_two_children = test_data[test_data['relatie_kind_huidige_aantal'] < 2]
    data_two_or_more_children = test_data[test_data['relatie_kind_huidige_aantal'] >= 2]
    
    X_less_than_two_children  = data_less_than_two_children.drop(['checked'], axis=1)
    X_two_or_more_children = data_two_or_more_children.drop(['checked'], axis=1)
    
    input_name = session.get_inputs()[0].name
    
    y_pred_onnx_less_than_two_children  = session.run( None, {input_name: X_less_than_two_children .values.astype('float32')} )[0]
    y_pred_onnx_two_or_more_children = session.run( None, {input_name: X_two_or_more_children.values.astype('float32')} )[0]
    
    parity_less_than_two_children  = round(y_pred_onnx_less_than_two_children.sum() / len(y_pred_onnx_less_than_two_children), 3)
    parity_two_or_more_children = round(y_pred_onnx_two_or_more_children.sum() / len(y_pred_onnx_two_or_more_children), 3)

    print('========')
    print('relatie_kind_huidige_aantal')
    print('Parity (children<2): ', round(parity_less_than_two_children, 3))
    print('Parity (children>=2): ', round(parity_two_or_more_children, 3))
    
    abs_diff = np.abs(parity_less_than_two_children - parity_two_or_more_children)
    abs_percent_diff = np.abs(parity_less_than_two_children - parity_two_or_more_children) / ((parity_less_than_two_children + parity_two_or_more_children) / 2) * 100
    
    print('Absolute difference: ', round(abs_diff, 3))
    print('Absolute Percentage difference: ', round(abs_percent_diff, 1))
    
    impact_disparity = min(parity_less_than_two_children, parity_two_or_more_children) / max(parity_less_than_two_children, parity_two_or_more_children)
    if (impact_disparity < 0.8):
        print(f'Impact disparity: {impact_disparity} < 0.8 - BAD')
    else:
        print(f'Impact disparity: {impact_disparity} >= 0.8 - GOOD')

In [15]:
#simple statistical parity as explained in Lighthouse report.
def simple_statistical_parity_single_mother1(test_data, session):
    data_single_mother = test_data[(test_data['persoon_geslacht_vrouw'] == 1) & 
                                   (test_data['relatie_kind_heeft_kinderen'] == 1) &
                                   (test_data['relatie_partner_huidige_partner___partner__gehuwd_'] == 0)]
    data_other = test_data[(test_data['persoon_geslacht_vrouw'] == 0) | 
                                   (test_data['relatie_kind_heeft_kinderen'] == 0) |
                                   (test_data['relatie_partner_huidige_partner___partner__gehuwd_'] == 1)]
    
    X_single_mother = data_single_mother.drop(['checked'], axis=1)
    X_other = data_other.drop(['checked'], axis=1)
    
    input_name = session.get_inputs()[0].name
    
    y_pred_onnx_single_mother = session.run( None, {input_name: X_single_mother.values.astype('float32')} )[0]
    y_pred_onnx_other = session.run( None, {input_name: X_other.values.astype('float32')} )[0]
    
    parity_single_mother = round(y_pred_onnx_single_mother.sum() / len(y_pred_onnx_single_mother), 3)
    parity_other = round(y_pred_onnx_other.sum() / len(y_pred_onnx_other), 3)

    print('========')
    print('Single mother (using relatie_partner_huidige_partner___partner__gehuwd_)')
    print('Parity (other): ', round(parity_other, 3))
    print('Parity (SM): ', round(parity_single_mother, 3))
    
    
    abs_diff = np.abs(parity_other - parity_single_mother)
    abs_percent_diff = np.abs(parity_other - parity_single_mother) / ((parity_other + parity_single_mother) / 2) * 100
    
    print('Absolute difference: ', round(abs_diff, 3))
    print('Absolute Percentage difference: ', round(abs_percent_diff, 1))
    
    impact_disparity = min(parity_single_mother, parity_other) / max(parity_single_mother, parity_other)
    if (impact_disparity < 0.8):
        print(f'Impact disparity: {impact_disparity} < 0.8 - BAD')
    else:
        print(f'Impact disparity: {impact_disparity} >= 0.8 - GOOD')

In [16]:
def simple_statistical_parity_single_mother2(test_data, session):
    data_single_mother = test_data[(test_data['persoon_geslacht_vrouw'] == 1) & 
                                   (test_data['relatie_kind_heeft_kinderen'] == 1) &
                                   (test_data['relatie_overig_kostendeler'] == 0)]
    data_other = test_data[(test_data['persoon_geslacht_vrouw'] == 0) | 
                                   (test_data['relatie_kind_heeft_kinderen'] == 0) |
                                   (test_data['relatie_overig_kostendeler'] == 1)]
    
    X_single_mother = data_single_mother.drop(['checked'], axis=1)
    X_other = data_other.drop(['checked'], axis=1)
    
    input_name = session.get_inputs()[0].name
    
    y_pred_onnx_single_mother = session.run( None, {input_name: X_single_mother.values.astype('float32')} )[0]
    y_pred_onnx_other = session.run( None, {input_name: X_other.values.astype('float32')} )[0]
    
    parity_single_mother = round(y_pred_onnx_single_mother.sum() / len(y_pred_onnx_single_mother), 3)
    parity_other = round(y_pred_onnx_other.sum() / len(y_pred_onnx_other), 3)

    print('========')
    print('Single mother (using relatie_overig_kostendeler)')
    print('Parity (other): ', round(parity_other, 3))
    print('Parity (SM): ', round(parity_single_mother, 3))
    
    
    abs_diff = np.abs(parity_other - parity_single_mother)
    abs_percent_diff = np.abs(parity_other - parity_single_mother) / ((parity_other + parity_single_mother) / 2) * 100
    
    print('Absolute difference: ', round(abs_diff, 3))
    print('Absolute Percentage difference: ', round(abs_percent_diff, 1))
    
    impact_disparity = min(parity_single_mother, parity_other) / max(parity_single_mother, parity_other)
    if (impact_disparity < 0.8):
        print(f'Impact disparity: {impact_disparity} < 0.8 - BAD')
    else:
        print(f'Impact disparity: {impact_disparity} >= 0.8 - GOOD')

SIMPLE STATISTICAL PARITY

In [17]:
print('MODEL 1 SIMPLE STATISTICAL PARITY')
simple_statistical_parity_binary('persoon_geslacht_vrouw', test_data, session_model1)
simple_statistical_multiple_children(test_data, session_model1)
simple_statistical_parity_single_mother1(test_data, session_model1)
simple_statistical_parity_single_mother2(test_data, session_model1)
simple_statistical_parity_binary('persoonlijke_eigenschappen_taaleis_voldaan', test_data, session_model1)

MODEL 1 SIMPLE STATISTICAL PARITY
persoon_geslacht_vrouw
Parity (0):  0.145
Parity (1):  0.193
Absolute difference:  0.048
Absolute Percentage difference:  28.4
Impact disparity: 0.7512953367875647 < 0.8 - BAD
relatie_kind_huidige_aantal
Parity (children<2):  0.155
Parity (children>=2):  0.238
Absolute difference:  0.083
Absolute Percentage difference:  42.2
Impact disparity: 0.6512605042016807 < 0.8 - BAD
Single mother (using relatie_partner_huidige_partner___partner__gehuwd_)
Parity (other):  0.152
Parity (SM):  0.229
Absolute difference:  0.077
Absolute Percentage difference:  40.4
Impact disparity: 0.6637554585152838 < 0.8 - BAD
Single mother (using relatie_overig_kostendeler)
Parity (other):  0.168
Parity (SM):  0.169
Absolute difference:  0.001
Absolute Percentage difference:  0.6
Impact disparity: 0.9940828402366864 >= 0.8 - GOOD
persoonlijke_eigenschappen_taaleis_voldaan
Parity (0):  0.221
Parity (1):  0.123
Absolute difference:  0.098
Absolute Percentage difference:  57.0
Impa

In [18]:
print('MODEL 2 SIMPLE STATISTICAL PARITY')
simple_statistical_parity_binary('persoon_geslacht_vrouw', test_data, session_model2)
simple_statistical_multiple_children(test_data, session_model2)
simple_statistical_parity_single_mother1(test_data, session_model2)
simple_statistical_parity_single_mother2(test_data, session_model2)
simple_statistical_parity_binary('persoonlijke_eigenschappen_taaleis_voldaan', test_data, session_model2)

MODEL 2 SIMPLE STATISTICAL PARITY
persoon_geslacht_vrouw
Parity (0):  0.149
Parity (1):  0.143
Absolute difference:  0.006
Absolute Percentage difference:  4.1
Impact disparity: 0.959731543624161 >= 0.8 - GOOD
relatie_kind_huidige_aantal
Parity (children<2):  0.136
Parity (children>=2):  0.198
Absolute difference:  0.062
Absolute Percentage difference:  37.1
Impact disparity: 0.6868686868686869 < 0.8 - BAD
Single mother (using relatie_partner_huidige_partner___partner__gehuwd_)
Parity (other):  0.139
Parity (SM):  0.174
Absolute difference:  0.035
Absolute Percentage difference:  22.4
Impact disparity: 0.7988505747126438 < 0.8 - BAD
Single mother (using relatie_overig_kostendeler)
Parity (other):  0.15
Parity (SM):  0.125
Absolute difference:  0.025
Absolute Percentage difference:  18.2
Impact disparity: 0.8333333333333334 >= 0.8 - GOOD
persoonlijke_eigenschappen_taaleis_voldaan
Parity (0):  0.193
Parity (1):  0.105
Absolute difference:  0.088
Absolute Percentage difference:  59.1
Impa