In [2]:
import os
import csv
import json
import math
import torch
import argparse
import difflib
import logging
import numpy as np
import pandas as pd

from transformers import BertTokenizer, BertForMaskedLM
from transformers import AlbertTokenizer, AlbertForMaskedLM
from transformers import RobertaTokenizer, RobertaForMaskedLM
from transformers import XLMRobertaTokenizer, XLMRobertaForMaskedLM
from transformers import DistilBertTokenizer, DistilBertForMaskedLM
from transformers import AutoModel, AutoTokenizer
# 
from collections import defaultdict
from tqdm import tqdm

from crows_pairs_methods import *

In [3]:
BERT_models = [
    'bert-base-cased',
    'bert-base-uncased',
    'bert-large-uncased',
    'bert-large-cased',
    'bert-base-multilingual-uncased',
    'bert-base-multilingual-cased',
    'allenai/scibert_scivocab_uncased',
    'emilyalsentzer/Bio_ClinicalBERT',
    'microsoft/BiomedNLP-PubMedBERT-base-uncased-abstract',
    'ProsusAI/finbert',
    'nlpaueb/legal-bert-base-uncased',
    'GroNLP/hateBERT',
    'anferico/bert-for-patents',
    'jackaduma/SecBERT'
]

ALBERT_models = [
    'albert-base-v1',
    'albert-base-v2'
]

ROBERTA_models = [
    'roberta-base',
    'distilroberta-base',
    'roberta-large',
    'huggingface/CodeBERTa-small-v1',
    'climatebert/distilroberta-base-climate-f'
]

all_models = BERT_models + ALBERT_models + ROBERTA_models + ['xlm-roberta-base', 'distilbert-base-multilingual-cased']

In [4]:
bias_types = [
    'race-color',
    'gender',
    'socioeconomic',
    'nationality',
    'religion', 
    'age',
    'sexual-orientation',
    'physical-appearance',
    'disability'
]

In [5]:
empty_data = {
    'model' : [],
    'bias_type': [],
    'metric_score' : [],
    'stereotype_score' : [],
    'antistereotype_score' : []
}

social_bias_dataframe = pd.DataFrame(empty_data)

In [6]:
logging.basicConfig(level=logging.INFO)

for model_name in all_models:

    # supported masked language models (using bert)
    if model_name in BERT_models:
        tokenizer = BertTokenizer.from_pretrained(model_name)
        model = BertForMaskedLM.from_pretrained(model_name)
    elif model_name in ALBERT_models:
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForMaskedLM.from_pretrained(model_name)
    elif model_name in ROBERTA_models:
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        model = RobertaForMaskedLM.from_pretrained(model_name)
    elif model_name == 'xlm-roberta-base':
        tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
        model = XLMRobertaForMaskedLM.from_pretrained(model_name)
    elif model_name == 'distilbert-base-multilingual-cased':
        tokenizer = DistilBertTokenizer.from_pretrained(model_name)
        model = DistilBertForMaskedLM.from_pretrained(model_name)

    model.eval()
    
    for bias_type in bias_types:

        # load data into panda DataFrame
        df_data = read_data("fixed_data.csv")

        # Filtering to Race Data
        df_data = df_data[df_data['bias_type']==bias_type]

        mask_token = tokenizer.mask_token
        log_softmax = torch.nn.LogSoftmax(dim=0)
        vocab = tokenizer.get_vocab()
        with open("bert" + ".vocab", "w") as f:
            f.write(json.dumps(vocab))

        lm = {"model": model,
              "tokenizer": tokenizer,
              "mask_token": mask_token,
              "log_softmax": log_softmax,
              "uncased": True
        }

        # score each sentence. 
        # each row in the dataframe has the sentid and score for pro and anti stereo.
        df_score = pd.DataFrame(columns=['sent_more', 'sent_less', 
                                         'sent_more_score', 'sent_less_score',
                                         'score', 'stereo_antistereo', 'bias_type'], dtype=object)

        total_stereo, total_antistereo = 0, 0
        stereo_score, antistereo_score = 0, 0

        N = 0
        neutral = 0
        total = len(df_data.index)
        with tqdm(total=total) as pbar:
            for index, data in df_data.iterrows():
                direction = data['direction']
                bias = data['bias_type']
                score = mask_unigram(data, lm)

                for stype in score.keys():
                    score[stype] = round(score[stype], 3)

                N += 1
                pair_score = 0
                pbar.update(1)
                if score['sent1_score'] == score['sent2_score']:
                    neutral += 1
                else:
                    if direction == 'stereo':
                        total_stereo += 1
                        if score['sent1_score'] > score['sent2_score']:
                            stereo_score += 1
                            pair_score = 1
                    elif direction == 'antistereo':
                        total_antistereo += 1
                        if score['sent2_score'] > score['sent1_score']:
                            antistereo_score += 1
                            pair_score = 1

                sent_more, sent_less = '', ''
                if direction == 'stereo':
                    sent_more = data['sent1']
                    sent_less = data['sent2']
                    sent_more_score = score['sent1_score']
                    sent_less_score = score['sent2_score']
                else:
                    sent_more = data['sent2']
                    sent_less = data['sent1']
                    sent_more_score = score['sent2_score']
                    sent_less_score = score['sent1_score']

                df_score = df_score.append({'sent_more': sent_more,
                                            'sent_less': sent_less,
                                            'sent_more_score': sent_more_score,
                                            'sent_less_score': sent_less_score,
                                            'score': pair_score,
                                            'stereo_antistereo': direction,
                                            'bias_type': bias
                                          }, ignore_index=True)

        metric_score = round((stereo_score + antistereo_score) / N * 100, 2)
        if total_stereo != 0:
            stereotype_score = round(stereo_score  / total_stereo * 100, 2)
        else:
            stereotype_score = -1
        if total_antistereo != 0:
            antistereotype_score = round(antistereo_score  / total_antistereo * 100, 2)
        else:
            antistereotype_score = -1

        loop_dict = {
            'model' : model_name,
            'bias_type' : bias_type,
            'metric_score' : metric_score,
            'stereotype_score' : stereotype_score,
            'antistereotype_score' : antistereotype_score
        }

        social_bias_dataframe = social_bias_dataframe.append(loop_dict, ignore_index=True)

Some weights of the model checkpoint at bert-base-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
100%|████████████████████████████████████████████████████████████████████████████████| 516/516 [17:43<00:00,  2.06s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 262/262 [07:27<00:00,  1.71s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 172/172 [05:26<00:00,  1.90s/it]
100%|████

100%|██████████████████████████████████████████████████████████████████████████████████| 87/87 [03:50<00:00,  2.65s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [04:06<00:00,  2.94s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 63/63 [03:00<00:00,  2.87s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [02:56<00:00,  2.94s/it]
Some weights of the model checkpoint at bert-base-multilingual-cased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expe

100%|████████████████████████████████████████████████████████████████████████████████| 516/516 [16:39<00:00,  1.94s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 262/262 [07:04<00:00,  1.62s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 172/172 [05:16<00:00,  1.84s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 159/159 [05:25<00:00,  2.05s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 105/105 [03:24<00:00,  1.95s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 87/87 [02:31<00:00,  1.75s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [02:38<00:00,  1.88s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 63/63 [01:56<00:00,  1.85s/it]
100%|███████████████████████████████████

ValueError: Connection error, and we cannot find the requested files in the cached path. Please try again or make sure your Internet connection is on.

In [7]:
social_bias_dataframe

Unnamed: 0,model,bias_type,metric_score,stereotype_score,antistereotype_score
0,bert-base-cased,race-color,48.84,48.84,48.84
1,bert-base-cased,gender,55.73,57.86,52.43
2,bert-base-cased,socioeconomic,56.40,58.60,33.33
3,bert-base-cased,nationality,49.69,49.32,54.55
4,bert-base-cased,religion,64.76,65.66,50.00
...,...,...,...,...,...
112,anferico/bert-for-patents,religion,60.00,57.58,100.00
113,anferico/bert-for-patents,age,56.32,54.79,64.29
114,anferico/bert-for-patents,sexual-orientation,54.76,55.56,50.00
115,anferico/bert-for-patents,physical-appearance,53.97,50.00,72.73


In [None]:
social_bias_dataframe.to_csv('social_bias_scores.csv')

# Running Rest of Models

In [9]:
rest_models = ['jackaduma/SecBERT',
'albert-base-v1',
'albert-base-v2',
'roberta-base',
'distilroberta-base',
'roberta-large',
'huggingface/CodeBERTa-small-v1',
'climatebert/distilroberta-base-climate-f',
'xlm-roberta-base', 
'distilbert-base-multilingual-cased']

In [10]:
logging.basicConfig(level=logging.INFO)

for model_name in rest_models:

    # supported masked language models (using bert)
    if model_name in BERT_models:
        tokenizer = BertTokenizer.from_pretrained(model_name)
        model = BertForMaskedLM.from_pretrained(model_name)
    elif model_name in ALBERT_models:
        tokenizer = AlbertTokenizer.from_pretrained(model_name)
        model = AlbertForMaskedLM.from_pretrained(model_name)
    elif model_name in ROBERTA_models:
        tokenizer = RobertaTokenizer.from_pretrained(model_name)
        model = RobertaForMaskedLM.from_pretrained(model_name)
    elif model_name == 'xlm-roberta-base':
        tokenizer = XLMRobertaTokenizer.from_pretrained(model_name)
        model = XLMRobertaForMaskedLM.from_pretrained(model_name)
    elif model_name == 'distilbert-base-multilingual-cased':
        tokenizer = DistilBertTokenizer.from_pretrained(model_name)
        model = DistilBertForMaskedLM.from_pretrained(model_name)

    model.eval()
    
    for bias_type in bias_types:

        # load data into panda DataFrame
        df_data = read_data("fixed_data.csv")

        # Filtering to Race Data
        df_data = df_data[df_data['bias_type']==bias_type]

        mask_token = tokenizer.mask_token
        log_softmax = torch.nn.LogSoftmax(dim=0)
        vocab = tokenizer.get_vocab()
        with open("bert" + ".vocab", "w") as f:
            f.write(json.dumps(vocab))

        lm = {"model": model,
              "tokenizer": tokenizer,
              "mask_token": mask_token,
              "log_softmax": log_softmax,
              "uncased": True
        }

        # score each sentence. 
        # each row in the dataframe has the sentid and score for pro and anti stereo.
        df_score = pd.DataFrame(columns=['sent_more', 'sent_less', 
                                         'sent_more_score', 'sent_less_score',
                                         'score', 'stereo_antistereo', 'bias_type'], dtype=object)

        total_stereo, total_antistereo = 0, 0
        stereo_score, antistereo_score = 0, 0

        N = 0
        neutral = 0
        total = len(df_data.index)
        with tqdm(total=total) as pbar:
            for index, data in df_data.iterrows():
                direction = data['direction']
                bias = data['bias_type']
                score = mask_unigram(data, lm)

                for stype in score.keys():
                    score[stype] = round(score[stype], 3)

                N += 1
                pair_score = 0
                pbar.update(1)
                if score['sent1_score'] == score['sent2_score']:
                    neutral += 1
                else:
                    if direction == 'stereo':
                        total_stereo += 1
                        if score['sent1_score'] > score['sent2_score']:
                            stereo_score += 1
                            pair_score = 1
                    elif direction == 'antistereo':
                        total_antistereo += 1
                        if score['sent2_score'] > score['sent1_score']:
                            antistereo_score += 1
                            pair_score = 1

                sent_more, sent_less = '', ''
                if direction == 'stereo':
                    sent_more = data['sent1']
                    sent_less = data['sent2']
                    sent_more_score = score['sent1_score']
                    sent_less_score = score['sent2_score']
                else:
                    sent_more = data['sent2']
                    sent_less = data['sent1']
                    sent_more_score = score['sent2_score']
                    sent_less_score = score['sent1_score']

                df_score = df_score.append({'sent_more': sent_more,
                                            'sent_less': sent_less,
                                            'sent_more_score': sent_more_score,
                                            'sent_less_score': sent_less_score,
                                            'score': pair_score,
                                            'stereo_antistereo': direction,
                                            'bias_type': bias
                                          }, ignore_index=True)

        metric_score = round((stereo_score + antistereo_score) / N * 100, 2)
        if total_stereo != 0:
            stereotype_score = round(stereo_score  / total_stereo * 100, 2)
        else:
            stereotype_score = -1
        if total_antistereo != 0:
            antistereotype_score = round(antistereo_score  / total_antistereo * 100, 2)
        else:
            antistereotype_score = -1

        loop_dict = {
            'model' : model_name,
            'bias_type' : bias_type,
            'metric_score' : metric_score,
            'stereotype_score' : stereotype_score,
            'antistereotype_score' : antistereotype_score
        }

        social_bias_dataframe = social_bias_dataframe.append(loop_dict, ignore_index=True)

100%|████████████████████████████████████████████████████████████████████████████████| 516/516 [12:24<00:00,  1.44s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 262/262 [05:33<00:00,  1.27s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 172/172 [04:01<00:00,  1.40s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 159/159 [04:01<00:00,  1.52s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 105/105 [02:33<00:00,  1.47s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 87/87 [01:55<00:00,  1.33s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [02:06<00:00,  1.50s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 63/63 [01:30<00:00,  1.43s/it]
100%|███████████████████████████████████

100%|██████████████████████████████████████████████████████████████████████████████████| 87/87 [01:48<00:00,  1.25s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 84/84 [01:56<00:00,  1.39s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 63/63 [01:22<00:00,  1.31s/it]
100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [01:16<00:00,  1.28s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 516/516 [40:45<00:00,  4.74s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 262/262 [18:43<00:00,  4.29s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 172/172 [13:07<00:00,  4.58s/it]
100%|████████████████████████████████████████████████████████████████████████████████| 159/159 [13:33<00:00,  5.11s/it]
100%|███████████████████████████████████

In [11]:
social_bias_dataframe

Unnamed: 0,model,bias_type,metric_score,stereotype_score,antistereotype_score
0,bert-base-cased,race-color,48.84,48.84,48.84
1,bert-base-cased,gender,55.73,57.86,52.43
2,bert-base-cased,socioeconomic,56.40,58.60,33.33
3,bert-base-cased,nationality,49.69,49.32,54.55
4,bert-base-cased,religion,64.76,65.66,50.00
...,...,...,...,...,...
202,distilbert-base-multilingual-cased,religion,43.81,44.44,33.33
203,distilbert-base-multilingual-cased,age,65.52,67.12,57.14
204,distilbert-base-multilingual-cased,sexual-orientation,72.62,79.17,33.33
205,distilbert-base-multilingual-cased,physical-appearance,55.56,50.00,81.82


In [12]:
social_bias_dataframe.to_csv('social_bias_scores.csv')