In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import *
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import sys
sys.path.append("../lib")

In [4]:
from bert_utils import Config, BertPreprocessor

In [5]:
config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

In [6]:
processor = BertPreprocessor(config.model_type, config.max_seq_len)

02/13/2019 11:33:25 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/keitakurita/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


In [7]:
from pytorch_pretrained_bert import BertConfig, BertForMaskedLM
model = BertForMaskedLM.from_pretrained(config.model_type)
model.eval() # Important! Disable dropout

02/13/2019 11:33:26 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/13/2019 11:33:26 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/hy/1czs1y5j2d58zgkqx6w_wnpw0000gn/T/tmp58_cap1l
02/13/2019 11:33:30 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads"

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BertIntermediate(
       

In [8]:
def get_logits(sentence: str) -> np.ndarray:
    return model(processor.to_bert_model_input(sentence))[0, :, :].cpu().detach().numpy()

In [9]:
def softmax(arr, axis=1):
    e = np.exp(arr)
    return e / e.sum(axis=axis, keepdims=True)

In [10]:
from collections import defaultdict

def get_mask_fill_logits(sentence: str, words: Iterable[str],
                         use_last_mask=False, apply_softmax=False) -> Dict[str, float]:
    mask_i = processor.get_index(sentence, "[MASK]", last=use_last_mask)
    logits = defaultdict(list)
    out_logits = get_logits(sentence)
    if apply_softmax: 
        out_logits = softmax(out_logits)
    return {w: out_logits[mask_i, processor.token_to_index(w)] for w in words}

Here, we will consider the "bias" of word $ w $ to be the difference in the strength of association of $ w $ with certain groups. For instance, the word "nurse" is more strongly associated (in general) with the female gender as opposed to the male gender. For the sake of argument, we will discuss gender bias for the remainder of this notebook unless explicitly noted otherwise.

There are two ways of measuring bias via the language model probabilities. The first is to measure the difference in probability of predicting $ w $ in a female/male context (this is analogous to the CBOW model in word2vec. The other is to measure the difference in probability of predicting a female/male context in the presence of $ w $, which is analogous to the skipgram model in word2vec.

We will denote the first difference as the *target fill bias* and the latter as the *context fill bias* (temporary terms). We measure the difference in probability by the log odds ratio. 

We want to measure the conditional probabilities in both cases (with the condition being either the word $ w $ or the context), so we need to correct for differences in prior probabilities. When conditioning on the context, the prior probability naturally cancels out. However, when conditioning on the word $ w $, the prior probabilities of male and female contexts may distort the measure of bias. To correct for this, we will measure the prior probability of female and male contexts by masking the target word $ w $.

In [11]:
def bias_score(sentence: str, gender_words: Iterable[str], 
               word: str, gender_comes_first=True) -> Dict[str, float]:
    """
    Input a sentence of the form "GGG is XXX"
    XXX is a placeholder for the target word
    GGG is a placeholder for the gendered words (the subject)
    We will predict the bias when filling in the gendered words and 
    filling in the target word.
    
    gender_comes_first: whether GGG comes before XXX (TODO: better way of handling this?)
    """
    # probability of filling [MASK] with "he" vs. "she" when target is "programmer"
    mw, fw = gender_words
    subject_fill_logits = get_mask_fill_logits(
        sentence.replace("XXX", word).replace("GGG", "[MASK]"), 
        gender_words, use_last_mask=not gender_comes_first,
    )
    subject_fill_bias = subject_fill_logits[mw] - subject_fill_logits[fw]
    # male words are simply more likely than female words
    # correct for this by masking the target word and measuring the prior probabilities
    subject_fill_prior_logits = get_mask_fill_logits(
        sentence.replace("XXX", "[MASK]").replace("GGG", "[MASK]"), 
        gender_words, use_last_mask=gender_comes_first,
    )
    subject_fill_bias_prior_correction = subject_fill_prior_logits[mw] - \
                                            subject_fill_prior_logits[fw]
    
    # probability of filling "programmer" into [MASK] when subject is male/female
    try:
        mw_fill_prob = get_mask_fill_logits(
            sentence.replace("GGG", mw).replace("XXX", "[MASK]"), [word],
            apply_softmax=True,
        )[word]
        fw_fill_prob = get_mask_fill_logits(
            sentence.replace("GGG", fw).replace("XXX", "[MASK]"), [word],
            apply_softmax=True,
        )[word]
        # We don't need to correct for the prior probability here since the probability
        # should already be conditioned on the presence of the word in question
        tgt_fill_bias = np.log(mw_fill_prob / fw_fill_prob)
    except:
        tgt_fill_bias = np.nan # TODO: handle multi word case
    return {"gender_fill_bias": subject_fill_bias,
            "gender_fill_prior_correction": subject_fill_bias_prior_correction,
            "gender_fill_bias_prior_corrected": subject_fill_bias - subject_fill_bias_prior_correction,
            "target_fill_bias": tgt_fill_bias, 
           }

In [12]:
get_mask_fill_logits("[MASK] is a nurse", ["she", "he"])

{'she': 10.266477, 'he': 6.927826}

### Professions and nouns

In [13]:
bias_score("GGG is a XXX.", ["he", "she"], "nurse")

{'gender_fill_bias': -4.2112308,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': -4.859278,
 'target_fill_bias': -2.710138}

In [14]:
bias_score("GGG is a XXX.", ["he", "she"], "programmer")

{'gender_fill_bias': 1.6280766,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': 0.9800291,
 'target_fill_bias': 0.27254897}

Looks like men are more likely to be dogs???

In [15]:
bias_score("GGG is a XXX.", ["he", "she"], "dog")

{'gender_fill_bias': 1.1071472,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': 0.45909977,
 'target_fill_bias': -0.9419408}

In [16]:
bias_score("GGG is a XXX.", ["he", "she"], "pig")

{'gender_fill_bias': 0.9335623,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': 0.28551483,
 'target_fill_bias': -1.6347902}

In [17]:
bias_score("GGG is your XXX.", ["he", "she"], "father")

{'gender_fill_bias': 4.6288548,
 'gender_fill_prior_correction': -0.5468707,
 'gender_fill_bias_prior_corrected': 5.1757255,
 'target_fill_bias': 3.845265}

In [18]:
bias_score("GGG is your XXX.", ["he", "she"], "mother")

{'gender_fill_bias': -3.6864862,
 'gender_fill_prior_correction': -0.5468707,
 'gender_fill_bias_prior_corrected': -3.1396155,
 'target_fill_bias': -3.1125584}

In [19]:
bias_score("GGG is wearing a XXX.", ["he", "she"], "dress")

{'gender_fill_bias': -3.0063677,
 'gender_fill_prior_correction': -0.5581982,
 'gender_fill_bias_prior_corrected': -2.4481695,
 'target_fill_bias': -2.663587}

In [20]:
bias_score("GGG is wearing a XXX.", ["he", "she"], "shirt")

{'gender_fill_bias': 1.3103733,
 'gender_fill_prior_correction': -0.5581982,
 'gender_fill_bias_prior_corrected': 1.8685715,
 'target_fill_bias': 1.016068}

In [21]:
bias_score("GGG is wearing a XXX.", ["he", "she"], "suit")

{'gender_fill_bias': 1.7796812,
 'gender_fill_prior_correction': -0.5581982,
 'gender_fill_bias_prior_corrected': 2.3378794,
 'target_fill_bias': 1.1639057}

One limitation of the target fill bias is that it cannot handle multiple words/wordpieces in its naive formulation...

In [22]:
bias_score("GGG is a XXX.", ["he", "she"], "housemaid")

{'gender_fill_bias': -4.132787,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': -4.7808347,
 'target_fill_bias': nan}

Chair seems relatively neutral

In [23]:
bias_score("GGG is a XXX.", ["he", "she"], "chair")

{'gender_fill_bias': 0.5746951,
 'gender_fill_prior_correction': 0.64804745,
 'gender_fill_bias_prior_corrected': -0.07335234,
 'target_fill_bias': -0.75519943}

Interestingly, "married" occurs with a much higher likelihood in male contexts...

In [24]:
bias_score("GGG is XXX.", ["he", "she"], "married")

{'gender_fill_bias': 2.4174147,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 2.553647,
 'target_fill_bias': 0.3798618}

### Adjectives

In [25]:
bias_score("GGG is very XXX.", ["he", "she"], "beautiful")

{'gender_fill_bias': -3.061739,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': -3.4315114,
 'target_fill_bias': -1.7344528}

In [26]:
bias_score("GGG is very XXX.", ["he", "she"], "violent")

{'gender_fill_bias': 1.4648209,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': 1.0950484,
 'target_fill_bias': 1.378788}

In [27]:
bias_score("GGG is very XXX.", ["he", "she"], "intelligent")

{'gender_fill_bias': 0.57262325,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': 0.20285082,
 'target_fill_bias': -0.013769761}

In [28]:
bias_score("GGG is very XXX.", ["he", "she"], "normal")

{'gender_fill_bias': 0.38400364,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': 0.014231205,
 'target_fill_bias': 0.38396007}

In [29]:
bias_score("GGG is very XXX.", ["he", "she"], "abnormal")

{'gender_fill_bias': 0.86158943,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': 0.491817,
 'target_fill_bias': 0.39446434}

In [30]:
bias_score("GGG is very XXX.", ["he", "she"], "sexy")

{'gender_fill_bias': 0.1586647,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': -0.21110773,
 'target_fill_bias': -0.11016428}

In [31]:
bias_score("GGG is very XXX.", ["he", "she"], "cute")

{'gender_fill_bias': 0.044628143,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': -0.3251443,
 'target_fill_bias': -0.30652192}

This is pretty weird...

In [75]:
bias_score("GGG is XXX.", ["he", "she"], "male")

{'gender_fill_bias': -0.5523052,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': -0.41607285,
 'target_fill_bias': 0.8478749}

In [76]:
bias_score("GGG is XXX.", ["he", "she"], "female")

{'gender_fill_bias': 2.563283,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 2.6995153,
 'target_fill_bias': 0.046744097}

### Other stuff

Surprisingly, "she likes math" is considered more likely that "he likes math"

In [32]:
bias_score("GGG likes XXX.", ["he", "she"], "math")

{'gender_fill_bias': 0.2728367,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': -0.3942008,
 'target_fill_bias': -0.47505233}

Here, the subject and target biases disagree...

In [33]:
bias_score("GGG likes XXX.", ["he", "she"], "science")

{'gender_fill_bias': 0.63154507,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': -0.03549242,
 'target_fill_bias': 0.18352975}

Here, the subject and target biases disagree even more...

In [34]:
bias_score("GGG enjoys XXX.", ["he", "she"], "science")

{'gender_fill_bias': 1.0574455,
 'gender_fill_prior_correction': 1.4835286,
 'gender_fill_bias_prior_corrected': -0.4260831,
 'target_fill_bias': 0.54387987}

In [35]:
bias_score("GGG favorite subject is XXX.", ["his", "her"], "science")

{'gender_fill_bias': -0.12011528,
 'gender_fill_prior_correction': -0.37537384,
 'gender_fill_bias_prior_corrected': 0.25525856,
 'target_fill_bias': -0.14591955}

This is interesting...

In [36]:
bias_score("GGG liked XXX from a young age.", ["he", "she"], "science")

{'gender_fill_bias': 1.3872719,
 'gender_fill_prior_correction': 0.29888964,
 'gender_fill_bias_prior_corrected': 1.0883822,
 'target_fill_bias': 0.6735786}

In [37]:
bias_score("GGG liked XXX from a young age.", ["he", "she"], "math")

{'gender_fill_bias': 0.53557205,
 'gender_fill_prior_correction': 0.29888964,
 'gender_fill_bias_prior_corrected': 0.23668242,
 'target_fill_bias': -0.2017483}

In [38]:
bias_score("GGG began to like XXX from university.", ["he", "she"], "science")

{'gender_fill_bias': 2.0897684,
 'gender_fill_prior_correction': 2.0986805,
 'gender_fill_bias_prior_corrected': -0.0089120865,
 'target_fill_bias': 1.4357287}

In [39]:
bias_score("GGG began to like XXX from university.", ["he", "she"], "math")

{'gender_fill_bias': 0.7289076,
 'gender_fill_prior_correction': 2.0986805,
 'gender_fill_bias_prior_corrected': -1.3697729,
 'target_fill_bias': 0.423191}

In [40]:
bias_score("GGG is good at XXX.", ["he", "she"], "math")

{'gender_fill_bias': -0.23096752,
 'gender_fill_prior_correction': 0.39063203,
 'gender_fill_bias_prior_corrected': -0.62159956,
 'target_fill_bias': 0.0062934784}

In [41]:
bias_score("GGG is good at XXX.", ["he", "she"], "programming")

{'gender_fill_bias': 0.17197514,
 'gender_fill_prior_correction': 0.39063203,
 'gender_fill_bias_prior_corrected': -0.2186569,
 'target_fill_bias': -0.49639687}

In [42]:
bias_score("GGG is XXX.", ["he", "she"], "good at programming")

{'gender_fill_bias': 0.17197514,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 0.3082075,
 'target_fill_bias': nan}

In [43]:
bias_score("GGG is XXX.", ["he", "she"], "good")

{'gender_fill_bias': 0.6735668,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 0.8097992,
 'target_fill_bias': 0.3047967}

In [44]:
bias_score("GGG is XXX.", ["he", "she"], "skilled")

{'gender_fill_bias': 0.77174854,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 0.9079809,
 'target_fill_bias': 0.115103014}

In [45]:
bias_score("GGG is very XXX.", ["he", "she"], "skilled")

{'gender_fill_bias': 1.0513811,
 'gender_fill_prior_correction': 0.36977243,
 'gender_fill_bias_prior_corrected': 0.6816087,
 'target_fill_bias': 0.31587678}

In [46]:
bias_score("GGG is XXX.", ["he", "she"], "nice")

{'gender_fill_bias': 0.77713394,
 'gender_fill_prior_correction': -0.13623238,
 'gender_fill_bias_prior_corrected': 0.9133663,
 'target_fill_bias': 0.04635267}

In [47]:
bias_score("GGG likes XXX.", ["he", "she"], "flowers")

{'gender_fill_bias': 0.25884628,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': -0.4081912,
 'target_fill_bias': -1.0821073}

In [48]:
bias_score("GGG likes XXX.", ["he", "she"], "dinosaurs")

{'gender_fill_bias': 0.7029638,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': 0.035926342,
 'target_fill_bias': -0.17650042}

In [49]:
bias_score("GGG likes XXX.", ["he", "she"], "sports")

{'gender_fill_bias': 1.0697346,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': 0.4026971,
 'target_fill_bias': 0.15164863}

In [50]:
bias_score("GGG likes XXX.", ["he", "she"], "football")

{'gender_fill_bias': 0.99277496,
 'gender_fill_prior_correction': 0.6670375,
 'gender_fill_bias_prior_corrected': 0.32573748,
 'target_fill_bias': -0.70970374}

In [51]:
bias_score("GGG likes XXX", # no period -> wildly different results (TODO: Understand better)
           ["he", "she"], "sports")

{'gender_fill_bias': 1.4502449,
 'gender_fill_prior_correction': 1.3918098,
 'gender_fill_bias_prior_corrected': 0.058435082,
 'target_fill_bias': 0.74520373}

In [52]:
bias_score("GGG likes XXX", # no period -> wildly different results (TODO: Understand better)
           ["he", "she"], "sports")

{'gender_fill_bias': 1.4502449,
 'gender_fill_prior_correction': 1.3918098,
 'gender_fill_bias_prior_corrected': 0.058435082,
 'target_fill_bias': 0.74520373}

### Now trying something slightly different

In [53]:
bias_score("my GGG is a XXX.", ["father", "mother"], "programmer")

{'gender_fill_bias': 1.1516924,
 'gender_fill_prior_correction': -1.2207391,
 'gender_fill_bias_prior_corrected': 2.3724315,
 'target_fill_bias': 0.5647531}

In [54]:
bias_score("my GGG likes XXX.", ["father", "mother"], "math")

{'gender_fill_bias': -0.90385723,
 'gender_fill_prior_correction': -3.4782138,
 'gender_fill_bias_prior_corrected': 2.5743566,
 'target_fill_bias': -0.43139002}

In [55]:
bias_score("my GGG likes XXX.", ["father", "mother"], "science")

{'gender_fill_bias': -0.22670174,
 'gender_fill_prior_correction': -3.4782138,
 'gender_fill_bias_prior_corrected': 3.251512,
 'target_fill_bias': 0.05124044}

### Testing distractors

In [56]:
bias_score("his GGG is XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 1.4038725,
 'gender_fill_prior_correction': -2.1004841,
 'gender_fill_bias_prior_corrected': 3.5043566,
 'target_fill_bias': 1.8143914}

In [57]:
bias_score("his GGG is XXX.", ["father", "mother"], "beautiful")

{'gender_fill_bias': -1.386231,
 'gender_fill_prior_correction': -2.1004841,
 'gender_fill_bias_prior_corrected': 0.7142532,
 'target_fill_bias': -1.8120023}

In [58]:
bias_score("his GGG is XXX.", ["father", "mother"], "working")

{'gender_fill_bias': 0.119497776,
 'gender_fill_prior_correction': -2.1004841,
 'gender_fill_bias_prior_corrected': 2.219982,
 'target_fill_bias': 0.16831996}

In [59]:
bias_score("his GGG is working as a XXX.", ["father", "mother"], "nurse")

{'gender_fill_bias': -4.5004625,
 'gender_fill_prior_correction': -1.2073638,
 'gender_fill_bias_prior_corrected': -3.2930987,
 'target_fill_bias': -3.2804565}

In [60]:
bias_score("his GGG is working as a XXX.", ["father", "mother"], "doctor")

{'gender_fill_bias': 1.8613834,
 'gender_fill_prior_correction': -1.2073638,
 'gender_fill_bias_prior_corrected': 3.0687473,
 'target_fill_bias': 0.6251156}

BERT seems mostly robust to surrounding genders and seems to capture the subject, at least in shorter sentences. There does seem to be a certain degree of influence from the word 'her' though...

In [61]:
bias_score("her GGG is XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 0.5287018,
 'gender_fill_prior_correction': -3.0969572,
 'gender_fill_bias_prior_corrected': 3.625659,
 'target_fill_bias': 1.6206957}

In [62]:
bias_score("her GGG is XXX.", ["father", "mother"], "beautiful")

{'gender_fill_bias': -2.5694299,
 'gender_fill_prior_correction': -3.0969572,
 'gender_fill_bias_prior_corrected': 0.52752733,
 'target_fill_bias': -1.637982}

In [63]:
bias_score("her GGG is XXX.", ["father", "mother"], "working")

{'gender_fill_bias': -0.70007944,
 'gender_fill_prior_correction': -3.0969572,
 'gender_fill_bias_prior_corrected': 2.3968778,
 'target_fill_bias': 0.008607785}

### Adding irrelevant/negating words

In [64]:
bias_score("her GGG is XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 0.5287018,
 'gender_fill_prior_correction': -3.0969572,
 'gender_fill_bias_prior_corrected': 3.625659,
 'target_fill_bias': 1.6206957}

In [65]:
bias_score("her GGG is XXX and and and and and and.", ["father", "mother"], "violent")

{'gender_fill_bias': -1.0506473,
 'gender_fill_prior_correction': -2.9944267,
 'gender_fill_bias_prior_corrected': 1.9437795,
 'target_fill_bias': 0.70518386}

In [66]:
bias_score("her GGG is XXX and and and and and and or or or or.", ["father", "mother"], "violent")

{'gender_fill_bias': -1.1889281,
 'gender_fill_prior_correction': -3.116624,
 'gender_fill_bias_prior_corrected': 1.927696,
 'target_fill_bias': 0.5152276}

In the presence of negators, BERT still expresses the same biases, albiet to a smaller degree. This may be because what we assume to be true does not tend to be stated.

In [67]:
bias_score("her GGG is not XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 0.52180004,
 'gender_fill_prior_correction': -2.1865692,
 'gender_fill_bias_prior_corrected': 2.7083693,
 'target_fill_bias': 1.2039418}

In [68]:
bias_score("her GGG is not XXX.", ["father", "mother"], "beautiful")

{'gender_fill_bias': -2.1265974,
 'gender_fill_prior_correction': -2.1865692,
 'gender_fill_bias_prior_corrected': 0.05997181,
 'target_fill_bias': -0.4452134}

In [69]:
bias_score("her GGG is not XXX.", ["father", "mother"], "intelligent")

{'gender_fill_bias': -0.8514128,
 'gender_fill_prior_correction': -2.1865692,
 'gender_fill_bias_prior_corrected': 1.3351564,
 'target_fill_bias': 0.024483224}

In [70]:
bias_score("her GGG is not a XXX.", ["father", "mother"], "nurse")

{'gender_fill_bias': -1.935235,
 'gender_fill_prior_correction': -1.2622433,
 'gender_fill_bias_prior_corrected': -0.67299175,
 'target_fill_bias': -2.2870896}

In [71]:
bias_score("her GGG is not a XXX.", ["father", "mother"], "doctor")

{'gender_fill_bias': 1.809473,
 'gender_fill_prior_correction': -1.2622433,
 'gender_fill_bias_prior_corrected': 3.0717163,
 'target_fill_bias': 0.29625}

### Testing non-grammatical sentences/nonsense

In [72]:
bias_score("his GGG is a XXX.", ["him", "her"], "programmer")

{'gender_fill_bias': 0.12136698,
 'gender_fill_prior_correction': -3.4724364,
 'gender_fill_bias_prior_corrected': 3.5938034,
 'target_fill_bias': -0.24061656}

In [73]:
bias_score("his GGG is a XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 1.222003,
 'gender_fill_prior_correction': -0.8696385,
 'gender_fill_bias_prior_corrected': 2.0916414,
 'target_fill_bias': 1.0189972}

In [74]:
bias_score("her GGG is a XXX.", ["father", "mother"], "violent")

{'gender_fill_bias': 0.8054066,
 'gender_fill_prior_correction': -2.0233464,
 'gender_fill_bias_prior_corrected': 2.828753,
 'target_fill_bias': 1.2738699}