# Imports

In [1]:
import pandas as pd
import numpy as np
import os
from sklearn import metrics
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

pd.set_option('display.max_columns', None)
pd.set_option('display.max_colwidth', None)

# Result path

In [2]:
dataset_name = 'social bias frames'
model_name = 'bert'
result_folder = f'../../results/{dataset_name}'
test_csv_filepath = os.path.join(result_folder, 'test.csv')

model_folder = f'{result_folder}/{model_name}' # for this particular model
result_filepath = os.path.join(model_folder, 'results.csv')
dp_result_filepath = os.path.join(model_folder, 'results_dp.csv')

# Dataset
The dataset can be preprocessed from the original dataset to be used here. To simplify things, I saved the preprocessed datasets during the tokenizing process as csv files and then downloaded them in the corresponding dataset folder of [`result`](../results/) directory.

You can recreate the processed datasets using the tokenize notebooks for that particular dataset. That would give you train, test and validation csv files as well as the tokenized data.

In [29]:
result = pd.read_csv(result_filepath)
dp_result = pd.read_csv(dp_result_filepath)
df = pd.read_csv(test_csv_filepath)

In [30]:
df.head(2)

Unnamed: 0,index,HITId,post,labels,male,female,white,black
0,0,3DA79LNS59PNAW62RLUHAOPS7E2T38,Fuc u say ? &#8220;@AyyyeThatsChubb: All these hoes look good on IG&#8221; http://t.co/PlsFL84cDp,1.0,0.0,0.0,0.0,0.0
1,1,3OQQD2WO8I0X1IQRX9VDWHNJWXN3I2,Yo hoe is like a tick so she hunting for a blood,1.0,0.0,0.0,0.0,0.0


## Split test result

In [31]:
# only calculate test result
result = result[result['split']=='test']
dp_result = dp_result[dp_result['split']=='test']

# drop split column
result.drop(columns=['split'], inplace=True)
dp_result.drop(columns=['split'], inplace=True)

## Prepare test data

In [32]:
# result has id column which is the same as the text ids from raw dataset
df.rename({'index': id_column}, axis=1, inplace=True)
df.drop(columns=['labels'], inplace=True)

## Merge

In [33]:
result = result.merge(df, on=id_column, how='inner').reset_index(drop=True)
dp_result = dp_result.merge(df, on=id_column, how='inner').reset_index(drop=True)

In [34]:
result.head()

Unnamed: 0,id,labels,probs,HITId,post,male,female,white,black
0,0,1,0.617603,3DA79LNS59PNAW62RLUHAOPS7E2T38,Fuc u say ? &#8220;@AyyyeThatsChubb: All these hoes look good on IG&#8221; http://t.co/PlsFL84cDp,0.0,0.0,0.0,0.0
1,1,1,0.810559,3OQQD2WO8I0X1IQRX9VDWHNJWXN3I2,Yo hoe is like a tick so she hunting for a blood,0.0,0.0,0.0,0.0
2,2,1,0.677522,32PT7WK7DM9GT7A55VU25K764SF3D0,"When u hitting it from the back u gotta call her ""bitch"" they love that but don't u dare call em ""slut""",0.0,1.0,0.0,0.0
3,3,1,0.374349,3ZFRE2BDQ98VC6MFH2QN3SFDZ5PXZS,Studying for this bio test like a hoe &#128554;,0.0,0.0,0.0,0.0
4,4,1,0.78907,38F60IALAGBRT4758YCY8QRL42IT0D,You niggas talking to these females trying to get some pussy but dont realize you're only boosting their ego and getting curved.,0.0,1.0,0.0,0.0


# Evaluation

In [14]:
import sys
# this adds the src folder in the sys path, where the metric_utils.py file is
# not needed if this notebook is in the same folder
sys.path.append( '..' )
from metric_utils import *

## Convert probability to prediction

In [35]:
result[prediction_column] = result[probability_column] >=0.5
dp_result[prediction_column] = dp_result[probability_column] >=0.5

## Identity groups

In [36]:
group_map = {
    'gender': {
        'unprivileged':['female'],
        'privileged':['male']
    },
    'race': {
        'unprivileged':['black'],
        'privileged': ['white']
    }
}

identities = []
for group_key in group_map.keys():
    subgroup_map = group_map[group_key]
    for subgroup_key in subgroup_map.keys():
        identities.extend(subgroup_map[subgroup_key])

print(identities)

['female', 'male', 'black', 'white']


### Binarize identity and target columns

In [37]:
result = binarize(result, [target_column] + identities)
dp_result = binarize(dp_result, [target_column] + identities)

In [46]:
for indentity in identities:
    print(indentity)
    print(result[result[indentity]][target_column].value_counts())

female
True    355
Name: labels, dtype: int64
male
True    46
Name: labels, dtype: int64
black
True     602
False      1
Name: labels, dtype: int64
white
True    7
Name: labels, dtype: int64


## Bias

In [18]:
bias_results = {
   # make sure your calculate bias method returns bias metrics in the same order
   'fairness_metrics': ['demographic parity', 'Equality of Opportunity (w.r.t y = 1)',
'Equality of Opportunity (w.r.t y = 0)', 'Equality of Odds', 'unprotected-accuracy',
'protected-accuracy', 'accuracy']
}

for group_key in group_map.keys():
   subgroup_map = group_map[group_key]
   privileged_group = subgroup_map['privileged']
   unprivileged_group = subgroup_map['unprivileged']

   bias_results[group_key] = calculate_bias(result, privileged_group, unprivileged_group)
   bias_results[group_key+'_DP'] = calculate_bias(dp_result, privileged_group, unprivileged_group)

bias_results = pd.DataFrame(bias_results) 
bias_results



ValueError: not enough values to unpack (expected 4, got 1)

In [97]:
bias_results.to_csv(os.path.join(model_folder, 'bias.csv'), index=False)

## Overall metrics

In [19]:
overall_results = {
    'metrics': ['auc', 'accuracy', 'f1_score', 'precision', 'recall', 'true positive rate', 'false positive rate']
}

for group_key in group_map.keys():
    subgroup_map = group_map[group_key]
    privileged_group = subgroup_map['privileged']
    unprivileged_group = subgroup_map['unprivileged']

    privileged_group_name = ','.join(privileged_group)
    unprivileged_group_name = ','.join(unprivileged_group)

    overall_results[privileged_group_name] = calculate_metrics(result, privileged_group)
    overall_results[privileged_group_name + '_DP'] = calculate_metrics(dp_result, privileged_group)

    overall_results[unprivileged_group_name] = calculate_metrics(result, unprivileged_group)
    overall_results[unprivileged_group_name + '_DP'] = calculate_metrics(dp_result, unprivileged_group)

overall_results['Total'] = calculate_metrics(result, [])
overall_results['Total_DP'] = calculate_metrics(dp_result, [])

overall_results = pd.DataFrame(overall_results) 
overall_results

ValueError: Only one class present in y_true. ROC AUC score is not defined in that case.

In [99]:
overall_results.to_csv(os.path.join(model_folder, 'overall_resuls.csv'), index=False)