|<h2>Substack post:</h2>|<h1><a href="https://mikexcohen.substack.com/p/king-man-woman-queen-is-fake-news" target="_blank">Gender bias in large language models, part 1 (measuring the bias)</a></h1>|
|-|:-:|
|<h2>Teacher:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the post may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import torch
import torch.nn.functional as F

In [None]:
### Run this cell only if you're using "dark mode"

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    'figure.facecolor': '#282a2c',
    'figure.edgecolor': '#282a2c',
    'axes.facecolor':   '#282a2c',
    'axes.edgecolor':   '#DDE2F4',
    'axes.labelcolor':  '#DDE2F4',
    'xtick.color':      '#DDE2F4',
    'ytick.color':      '#DDE2F4',
    'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
})

# Demo 1: Import the BERT LLM and tokenize text

In [None]:
from transformers import BertTokenizer, BertForMaskedLM

# Load BERT model and tokenizer
tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
model = BertForMaskedLM.from_pretrained('bert-large-uncased')
model.eval()

In [None]:
text = 'I like to eat [MASK] chocolate-covered raisins.'
tokens = tokenizer.encode(text,return_tensors='pt')

for t in tokens[0]:
  print(f'{t:5}: "{tokenizer.decode(t)}"')

# Demo 2: Get logits of four text versions

In [None]:
# list of target words
target_words = [ 'he','she','they' ]

# tokenize sentences
tokens_he   = tokenizer.encode('The engineer informed the client that he would need more time.',return_tensors='pt')
tokens_she  = tokenizer.encode('The engineer informed the client that she would need more time.',return_tensors='pt')
tokens_they = tokenizer.encode('The engineer informed the client that they would need more time.',return_tensors='pt')

# tokenize the masked sentence
tokens_mask = tokenizer.encode(f'The engineer informed the client that {tokenizer.mask_token} would need more time.',return_tensors='pt')

In [None]:
tokens_he

In [None]:
# 1) the mask index
maskTarget_idx = torch.where(tokens_mask[0] == tokenizer.mask_token_id)[0].item()

# 2) token indices of target words
targets_idx = [tokenizer.encode(t)[1] for t in target_words]

# 3) print out the tokens
for t in tokens_mask[0]:
  print(f'{t:5}: "{tokenizer.decode(t)}"')

# 4) print out the target tokens
print(f'\nThe mask is in token index {maskTarget_idx}\n')
for t in targets_idx:
  print(f'Target "{tokenizer.decode(t)}" is index {t}')


In [None]:
# forward-pass the four versions
with torch.no_grad():
  out_he = model(tokens_he)
  out_she = model(tokens_she)
  out_they = model(tokens_they)
  out_mask = model(tokens_mask)

In [None]:
out_he

In [None]:
out_he.logits.shape

In [None]:
logits_he = out_he.logits[0,maskTarget_idx,:].cpu()

plt.figure(figsize=(12,4))
plt.plot(logits_he,'ko',markerfacecolor=[.7,.9,.7,.5],markersize=8)
plt.gca().set(xlabel='Tokens',ylabel='Logits',title='Logits in "he" sentence')
plt.show()

In [None]:
sm_logits_he = F.softmax(logits_he,dim=-1)

plt.figure(figsize=(12,4))
plt.plot(100*sm_logits_he,'wo',markerfacecolor=[.7,.9,.7,.5],markersize=8)
plt.gca().set(xlabel='Tokens',ylabel='Probability',title='Softmax probs in "he" sentence')
plt.show()

In [None]:
maxlogit = torch.argmax(logits_he)
print(f'Max token is {maxlogit} ("{tokenizer.decode(maxlogit)}")')

# Demo 3: Quantify the bias

In [None]:
# grab and visualize the log-softmax

fig,axs = plt.subplots(2,3,figsize=(12,5))

# for "he"
logsm = F.log_softmax(out_he.logits[0,maskTarget_idx,:],dim=-1).cpu()
axs[0,0].bar(range(3),logsm[targets_idx],color=[.9,.7,.7])
axs[1,0].bar(range(3),100*torch.exp(logsm[targets_idx]),color=[.9,.7,.7])
axs[0,0].set(xticks=range(3),xticklabels=target_words,ylabel='Log-softmax',title='A) Probs. in $he$-sentence')
axs[1,0].set(xticks=range(3),xticklabels=target_words,xlabel='Target words',ylabel='Softmax prob (%)')


# for "she"
logsm = F.log_softmax(out_she.logits[0,maskTarget_idx,:],dim=-1).cpu()
axs[0,1].bar(range(3),logsm[targets_idx],color=[.7,.7,.9])
axs[1,1].bar(range(3),100*torch.exp(logsm[targets_idx]),color=[.7,.7,.9])
axs[0,1].set(xticks=range(3),xticklabels=target_words,ylabel='Log-softmax',title='B) Probs. in $she$-sentence')
axs[1,1].set(xticks=range(3),xticklabels=target_words,xlabel='Target words',ylabel='Softmax prob (%)')

# for "they"
logsm = F.log_softmax(out_they.logits[0,maskTarget_idx,:],dim=-1).cpu()
axs[0,2].bar(range(3),logsm[targets_idx],color=[.7,.9,.7])
axs[1,2].bar(range(3),100*torch.exp(logsm[targets_idx]),color=[.7,.9,.7])
axs[0,2].set(xticks=range(3),xticklabels=target_words,ylabel='Log-softmax',title='C) Probs. in $they$-sentence')
axs[1,2].set(xticks=range(3),xticklabels=target_words,xlabel='Target words',ylabel='Softmax prob (%)')


plt.tight_layout()
plt.show()

In [None]:
# grab and visualize the log-softmax
logsm = F.log_softmax(out_mask.logits[0,maskTarget_idx,:],dim=-1).cpu()

fig,axs = plt.subplots(1,2,figsize=(10,3.5))

axs[0].bar(range(3),logsm[targets_idx],color=[.9,.7,.7])
axs[1].bar(range(3),100*torch.exp(logsm[targets_idx]),color=[.7,.9,.7])

axs[0].set(xticks=range(3),xticklabels=target_words,xlabel='Target words',
           ylabel='Log-softmax',title='Log-softmax for masked word')
axs[1].set(xticks=range(3),xticklabels=target_words,xlabel='Target words',
           ylabel='Softmax prob. (%)',title='Softmax probability for masked word')

fig.suptitle(tokenizer.decode(tokens_mask[0,1:-1]),fontweight='bold')

plt.tight_layout()
plt.show()