In [1]:
import sys
sys.path.append("..")

In [2]:
from src.datasets import BiasInBios
from src.datasets import HateXplainRace
from src.datasets import HateXplainGender
from src.datasets import TwitterAAE

In [3]:
from src.utils import metrics

In [4]:
import random

In [5]:
def parse_output(baseline, performance): 
    result = [
            baseline,
            'N/A',
            'N/A',
            'N/A',
            performance["total_score"],
    ]

    group_results = performance["score"]

    gaps = performance["max_gaps"]

    for group_result in group_results:
        result.append({group_result: group_results[group_result]})

    gaps = dict(sorted(gaps.items(), key=lambda item: item[1][3]))

    result.append(list(gaps.values())[0][3])

    for class_name in gaps:
        gap = gaps[class_name]

        result.append({class_name: list(gap)})
    
    return result

In [6]:
bias = BiasInBios("../data/biasbios")
haterace = HateXplainRace("../data/HateXplain")
hategender = HateXplainGender("../data/HateXplain")
aae = TwitterAAE("../data/moji/twitteraae_sentiment_race")

In [7]:
bias_train_df, bias_test_df, bias_demo = bias.create_prompts()
haterace_train_df, haterace_test_df, haterace_demo = haterace.create_prompts()
hategender_train_df, hategender_test_df, hategender_demo = hategender.create_prompts()
aae_train_df, aae_test_df, aae_demo = aae.create_prompts()

# Majority

In [8]:
# bias in bios
bias_train_df.labels.value_counts()

professor       76771
physician       25067
attorney        21194
photographer    15810
journalist      12967
psychologist    11891
teacher         10527
dentist          9411
Name: labels, dtype: int64

In [9]:
majority_baseline_bias = len(bias_test_df) * ['professor']

In [10]:
parse_output('majority', metrics(majority_baseline_bias, bias_test_df['labels'].tolist(), 'bias', bias_test_df['demographics'].tolist(), bias.demographics))

['majority',
 'N/A',
 'N/A',
 'N/A',
 0.027777777777777776,
 {'m': 0.027777777777777776},
 {'f': 0.027777777777777776},
 1.0,
 {'attorney': ['m', 'm', 0.0, 1.0]},
 {'dentist': ['m', 'm', 0.0, 1.0]},
 {'teacher': ['m', 'm', 0.0, 1.0]},
 {'photographer': ['m', 'm', 0.0, 1.0]},
 {'physician': ['m', 'm', 0.0, 1.0]},
 {'journalist': ['m', 'm', 0.0, 1.0]},
 {'psychologist': ['m', 'm', 0.0, 1.0]},
 {'professor': ['m', 'm', 0.0, 1.0]}]

In [11]:
# aae
aae_train_df.labels.value_counts()

happy    80000
sad      80000
Name: labels, dtype: int64

In [12]:
majority_baseline_aae = len(aae_test_df) * ['sad']

In [13]:
parse_output('majority', metrics(majority_baseline_aae, aae_test_df['labels'].tolist(), 'aae', aae_test_df['demographics'].tolist(), aae.demographics))

['majority',
 'N/A',
 'N/A',
 'N/A',
 0.3333333333333333,
 {'aa': 0.3333333333333333},
 {'wh': 0.3333333333333333},
 1.0,
 {'sad': ['aa', 'aa', 0.0, 1.0]},
 {'happy': ['aa', 'aa', 0.0, 1.0]}]

In [15]:
# hatexplain race
haterace_train_df.labels.value_counts()

yes    4371
no     1005
Name: labels, dtype: int64

In [16]:
majority_baseline_haterace = len(haterace_test_df) * ['yes']

In [17]:
parse_output('majority', metrics(majority_baseline_haterace, haterace_test_df['labels'].tolist(), 'hatexplain-race', haterace_test_df['demographics'].tolist(), haterace.demographics))

['majority',
 'N/A',
 'N/A',
 'N/A',
 0.44362745098039214,
 {'African': 0.4588744588744589},
 {'Arab': 0.47161572052401746},
 {'Asian': 0.40540540540540543},
 {'Hispanic': 0.46969696969696967},
 {'Caucasian': 0.34567901234567905},
 1.0,
 {'yes': ['African', 'African', 0.0, 1.0]},
 {'no': ['African', 'African', 0.0, 1.0]}]

In [19]:
# hatexplain gender
hategender_train_df.labels.value_counts()

yes    1937
no      669
Name: labels, dtype: int64

In [20]:
majority_baseline_hategender = len(hategender_test_df) * ['yes']

In [21]:
parse_output('majority', metrics(majority_baseline_hategender, hategender_test_df['labels'].tolist(), 'hatexplain-race', hategender_test_df['demographics'].tolist(), hategender.demographics))

['majority',
 'N/A',
 'N/A',
 'N/A',
 0.42985611510791366,
 {'Men': 0.379746835443038},
 {'Women': 0.4381551362683438},
 1.0,
 {'yes': ['Men', 'Men', 0.0, 1.0]},
 {'no': ['Men', 'Men', 0.0, 1.0]}]

# Random

In [22]:
# bias
random_baseline_bias = random.choices(bias.labels, k=len(bias_test_df))

In [23]:
parse_output('random', metrics(random_baseline_bias, bias_test_df['labels'].tolist(), 'bias', bias_test_df['demographics'].tolist(), bias.demographics))

['random',
 'N/A',
 'N/A',
 'N/A',
 0.1187832435932761,
 {'m': 0.11610075900028297},
 {'f': 0.12161981367397924},
 0.98,
 {'photographer': ['f', 'm', 0.020000000000000004, 0.98]},
 {'professor': ['m', 'f', 0.01999999999999999, 0.98]},
 {'psychologist': ['f', 'm', 0.018000000000000002, 0.982]},
 {'journalist': ['f', 'm', 0.011999999999999997, 0.988]},
 {'dentist': ['f', 'm', 0.010000000000000009, 0.99]},
 {'attorney': ['f', 'm', 0.008000000000000007, 0.992]},
 {'physician': ['m', 'f', 0.006000000000000005, 0.994]},
 {'teacher': ['f', 'm', 0.0, 1.0]}]

In [25]:
# aae
random_baseline_aae = random.choices(aae.labels, k=len(aae_test_df))

In [26]:
parse_output('random', metrics(random_baseline_aae, aae_test_df['labels'].tolist(), 'aae', aae_test_df['demographics'].tolist(), aae.demographics))

['random',
 'N/A',
 'N/A',
 'N/A',
 0.49562366813124864,
 {'aa': 0.4982484633859191},
 {'wh': 0.4929988592474333},
 0.9944999999999999,
 {'happy': ['aa', 'wh', 0.005500000000000005, 0.9944999999999999]},
 {'sad': ['aa', 'wh', 0.0050000000000000044, 0.995]}]

In [28]:
# hatexplain race
random_baseline_haterace = random.choices(haterace.labels, k=len(haterace_test_df))

In [29]:
parse_output('random', metrics(random_baseline_haterace, haterace_test_df['labels'].tolist(), 'hatexplain-race', haterace_test_df['demographics'].tolist(), haterace.demographics))

['random',
 'N/A',
 'N/A',
 'N/A',
 0.4305019712014842,
 {'African': 0.40324257893741733},
 {'Arab': 0.4053582752060875},
 {'Asian': 0.5580357142857143},
 {'Hispanic': 0.4493006993006993},
 {'Caucasian': 0.45835948005378757},
 0.4,
 {'no': ['Hispanic', 'Caucasian', 0.6, 0.4]},
 {'yes': ['Asian', 'Hispanic', 0.21397849462365587, 0.7860215053763442]}]

In [31]:
# hatexplain gender
random_baseline_hategender = random.choices(hategender.labels, k=len(hategender_test_df))

In [32]:
parse_output('random', metrics(random_baseline_hategender, hategender_test_df['labels'].tolist(), 'hatexplain-gender', hategender_test_df['demographics'].tolist(), hategender.demographics))

['random',
 'N/A',
 'N/A',
 'N/A',
 0.4502115059221658,
 {'Men': 0.5416666666666666},
 {'Women': 0.43135496981650834},
 0.9022328548644338,
 {'yes': ['Men', 'Women', 0.09776714513556617, 0.9022328548644338]},
 {'no': ['Men', 'Women', 0.05173951828724349, 0.9482604817127565]}]