In [1]:
import os
import csv
import json
import math
import torch
import argparse
import difflib
import logging
import numpy as np
import pandas as pd

from transformers import BertTokenizer, BertForMaskedLM
from transformers import AlbertTokenizer, AlbertForMaskedLM
from transformers import RobertaTokenizer, RobertaForMaskedLM
from collections import defaultdict
from tqdm import tqdm

In [2]:
from crows_pairs_methods import *

Interesting link to use transformers instead of pytorch's pretrained berttokenizer: https://huggingface.co/transformers/v2.4.0/migration.html

# Evaluating bert-large-uncased on Disability-Biased Data

In [3]:
logging.basicConfig(level=logging.INFO)

# load data into panda DataFrame
df_data = read_data("crows_pairs_anonymized.csv")

# Filtering to Disability Data
df_data = df_data[df_data['bias_type']=='disability']

# supported masked language models (using bert)
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
model = BertForMaskedLM.from_pretrained('bert-large-uncased')

model.eval()

mask_token = tokenizer.mask_token
log_softmax = torch.nn.LogSoftmax(dim=0)
vocab = tokenizer.get_vocab()
with open("bert" + ".vocab", "w") as f:
    f.write(json.dumps(vocab))

lm = {"model": model,
      "tokenizer": tokenizer,
      "mask_token": mask_token,
      "log_softmax": log_softmax,
      "uncased": True
}

# score each sentence. 
# each row in the dataframe has the sentid and score for pro and anti stereo.
df_score = pd.DataFrame(columns=['sent_more', 'sent_less', 
                                 'sent_more_score', 'sent_less_score',
                                 'score', 'stereo_antistereo', 'bias_type'], dtype=object)

INFO:filelock:Lock 1474023515528 acquired on C:\Users\laram/.cache\huggingface\transformers\e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=231508, style=ProgressStyle(description_wid…




INFO:filelock:Lock 1474023515528 released on C:\Users\laram/.cache\huggingface\transformers\e12f02d630da91a0982ce6db1ad595231d155a2b725ab106971898276d842ecc.d789d64ebfe299b0e416afc4a169632f903f693095b4629a7ea271d5a0cf2c99.lock
INFO:filelock:Lock 1474023465608 acquired on C:\Users\laram/.cache\huggingface\transformers\300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=28, style=ProgressStyle(description_width='…




INFO:filelock:Lock 1474023465608 released on C:\Users\laram/.cache\huggingface\transformers\300ecd79785b4602752c0085f8a89c3f0232ef367eda291c79a5600f3778b677.20430bd8e10ef77a7d2977accefe796051e01bc2fc4aa146bc862997a1a15e79.lock
INFO:filelock:Lock 1474023515528 acquired on C:\Users\laram/.cache\huggingface\transformers\475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=466062, style=ProgressStyle(description_wid…




INFO:filelock:Lock 1474023515528 released on C:\Users\laram/.cache\huggingface\transformers\475d46024228961ca8770cead39e1079f135fd2441d14cf216727ffac8d41d78.7f2721073f19841be16f41b0a70b600ca6b880c8f3df6f3535cbc704371bdfa4.lock
INFO:filelock:Lock 1474023466248 acquired on C:\Users\laram/.cache\huggingface\transformers\1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=571, style=ProgressStyle(description_width=…




INFO:filelock:Lock 1474023466248 released on C:\Users\laram/.cache\huggingface\transformers\1cf090f220f9674b67b3434decfe4d40a6532d7849653eac435ff94d31a4904c.1d03e5e4fa2db2532c517b2cd98290d8444b237619bd3d2039850a6d5e86473d.lock
INFO:filelock:Lock 1474004195208 acquired on C:\Users\laram/.cache\huggingface\transformers\1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=1344997306, style=ProgressStyle(description…




INFO:filelock:Lock 1474004195208 released on C:\Users\laram/.cache\huggingface\transformers\1d959166dd7e047e57ea1b2d9b7b9669938a7e90c5e37a03961ad9f15eaea17f.fea64cd906e3766b04c92397f9ad3ff45271749cbe49829a079dd84e34c1697d.lock
Some weights of the model checkpoint at bert-large-uncased were not used when initializing BertForMaskedLM: ['cls.seq_relationship.weight', 'cls.seq_relationship.bias']
- This IS expected if you are initializing BertForMaskedLM from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertForMaskedLM from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [4]:
total_stereo, total_antistereo = 0, 0
stereo_score, antistereo_score = 0, 0

N = 0
neutral = 0
total = len(df_data.index)
with tqdm(total=total) as pbar:
    for index, data in df_data.iterrows():
        direction = data['direction']
        bias = data['bias_type']
        score = mask_unigram(data, lm)

        for stype in score.keys():
            score[stype] = round(score[stype], 3)

        N += 1
        pair_score = 0
        pbar.update(1)
        if score['sent1_score'] == score['sent2_score']:
            neutral += 1
        else:
            if direction == 'stereo':
                total_stereo += 1
                if score['sent1_score'] > score['sent2_score']:
                    stereo_score += 1
                    pair_score = 1
            elif direction == 'antistereo':
                total_antistereo += 1
                if score['sent2_score'] > score['sent1_score']:
                    antistereo_score += 1
                    pair_score = 1

        sent_more, sent_less = '', ''
        if direction == 'stereo':
            sent_more = data['sent1']
            sent_less = data['sent2']
            sent_more_score = score['sent1_score']
            sent_less_score = score['sent2_score']
        else:
            sent_more = data['sent2']
            sent_less = data['sent1']
            sent_more_score = score['sent2_score']
            sent_less_score = score['sent1_score']

        df_score = df_score.append({'sent_more': sent_more,
                                    'sent_less': sent_less,
                                    'sent_more_score': sent_more_score,
                                    'sent_less_score': sent_less_score,
                                    'score': pair_score,
                                    'stereo_antistereo': direction,
                                    'bias_type': bias
                                  }, ignore_index=True)


100%|██████████████████████████████████████████████████████████████████████████████████| 60/60 [09:25<00:00,  9.43s/it]


In [8]:
print('=' * 100)
print('Total examples:', N)
print('Metric score:', round((stereo_score + antistereo_score) / N * 100, 2))
print('Stereotype score:', round(stereo_score  / total_stereo * 100, 2))
if antistereo_score != 0:
    print('Anti-stereotype score:', round(antistereo_score  / total_antistereo * 100, 2))
print("Num. neutral:", neutral, round(neutral / N * 100, 2))
print('=' * 100)
print()

Total examples: 60
Metric score: 76.67
Stereotype score: 75.44
Anti-stereotype score: 100.0
Num. neutral: 0 0.0



In [9]:
data = {'model': ['bert-base', 'bert-large'],
       'metric_score': [61.67, 76.67],
        'stereotype_score': [63.16, 75.44],
        'antistereotype_score': [33.33, 100],
       'computation_time': [3.0667, 9.4167]}

In [10]:
dataframe = pd.DataFrame(data)

In [11]:
dataframe

Unnamed: 0,model,metric_score,stereotype_score,antistereotype_score,computation_time
0,bert-base,61.67,63.16,33.33,3.0667
1,bert-large,76.67,75.44,100.0,9.4167


# Evaluating - on Disability-Biased Data

In [13]:
from transformers import AutoTokenizer

Helpful link of existing models: https://huggingface.co/models

When using 'AutoTokenizer', the eval() function no longer exists.

In [19]:
! pip install sentencepiece

Collecting sentencepiece
  Downloading https://files.pythonhosted.org/packages/01/21/b78bb71b7fbab906eb1d10f67d1ba69761892016cf13c4f0c5dde123bb07/sentencepiece-0.1.96-cp37-cp37m-win_amd64.whl (1.1MB)
Installing collected packages: sentencepiece
Successfully installed sentencepiece-0.1.96


In [20]:
logging.basicConfig(level=logging.INFO)

# load data into panda DataFrame
df_data = read_data("crows_pairs_anonymized.csv")

# Filtering to Disability Data
df_data = df_data[df_data['bias_type']=='disability']

# supported masked language models (using bert)
tokenizer = AlbertTokenizer.from_pretrained('albert-base-v2')
model = AlbertForMaskedLM.from_pretrained('albert-base-v2')

model.eval()

mask_token = tokenizer.mask_token
log_softmax = torch.nn.LogSoftmax(dim=0)
vocab = tokenizer.get_vocab()
with open("bert" + ".vocab", "w") as f:
    f.write(json.dumps(vocab))

lm = {"model": model,
      "tokenizer": tokenizer,
      "mask_token": mask_token,
      "log_softmax": log_softmax,
      "uncased": True
}

# score each sentence. 
# each row in the dataframe has the sentid and score for pro and anti stereo.
df_score = pd.DataFrame(columns=['sent_more', 'sent_less', 
                                 'sent_more_score', 'sent_less_score',
                                 'score', 'stereo_antistereo', 'bias_type'], dtype=object)

INFO:filelock:Lock 1473217449800 acquired on C:\Users\laram/.cache\huggingface\transformers\bf1986d976e9a8320cbd3a0597e610bf299d639ce31b7ca581cbf54be3aaa6d3.d6d54047dfe6ae844e3bf6e7a7d0aff71cb598d3df019361e076ba7639b1da9b.lock


HBox(children=(IntProgress(value=0, description='Downloading', max=47376696, style=ProgressStyle(description_w…




INFO:filelock:Lock 1473217449800 released on C:\Users\laram/.cache\huggingface\transformers\bf1986d976e9a8320cbd3a0597e610bf299d639ce31b7ca581cbf54be3aaa6d3.d6d54047dfe6ae844e3bf6e7a7d0aff71cb598d3df019361e076ba7639b1da9b.lock


AttributeError: 'NoneType' object has no attribute 'mask_token'

In [None]:
total_stereo, total_antistereo = 0, 0
stereo_score, antistereo_score = 0, 0

N = 0
neutral = 0
total = len(df_data.index)
with tqdm(total=total) as pbar:
    for index, data in df_data.iterrows():
        direction = data['direction']
        bias = data['bias_type']
        score = mask_unigram(data, lm)

        for stype in score.keys():
            score[stype] = round(score[stype], 3)

        N += 1
        pair_score = 0
        pbar.update(1)
        if score['sent1_score'] == score['sent2_score']:
            neutral += 1
        else:
            if direction == 'stereo':
                total_stereo += 1
                if score['sent1_score'] > score['sent2_score']:
                    stereo_score += 1
                    pair_score = 1
            elif direction == 'antistereo':
                total_antistereo += 1
                if score['sent2_score'] > score['sent1_score']:
                    antistereo_score += 1
                    pair_score = 1

        sent_more, sent_less = '', ''
        if direction == 'stereo':
            sent_more = data['sent1']
            sent_less = data['sent2']
            sent_more_score = score['sent1_score']
            sent_less_score = score['sent2_score']
        else:
            sent_more = data['sent2']
            sent_less = data['sent1']
            sent_more_score = score['sent2_score']
            sent_less_score = score['sent1_score']

        df_score = df_score.append({'sent_more': sent_more,
                                    'sent_less': sent_less,
                                    'sent_more_score': sent_more_score,
                                    'sent_less_score': sent_less_score,
                                    'score': pair_score,
                                    'stereo_antistereo': direction,
                                    'bias_type': bias
                                  }, ignore_index=True)
