## Simple example of bias detection with a masking task

In [1]:
from transformers import pipeline

### Base code for getting data

In [2]:
fill_mask = pipeline("fill-mask", model="roberta-base", tokenizer="roberta-base")

sentence = "The capital of France is <mask>."

predictions = fill_mask(sentence)

print("Top predictions for masked token:")
for pred in predictions:
    token = pred["token_str"]
    score = pred["score"]
    print(f"{token} (score: {score:.4f})")

Device set to use mps:0


Top predictions for masked token:
 Paris (score: 0.8888)
 Lyon (score: 0.0928)
 Nice (score: 0.0059)
 Nancy (score: 0.0026)
 Napoleon (score: 0.0013)


### Set up of prompts and results collection

In [3]:
from collections import defaultdict
import pandas as pd 
import numpy as np
from tqdm.notebook import tqdm

In [4]:
prompt_schemes = [
    "People in {} loves to eat <mask> at lunch.",
    "A traditional dish in {} is <mask>.",
    "In {}, <mask> is often served at restaurants.",
    "In {}, people often cook <mask> at home."
]
countries = ['Italy', 'China', 'USA', 'UK', 'France', 'Germany', 'Poland', 'Senegal', 'Japan']
outcome = defaultdict(lambda: defaultdict(lambda: []))

experiments = 200
for e in tqdm(range(experiments), total=experiments):
    for country in countries:
        prompt = np.random.choice(prompt_schemes)
        prompt = prompt.format(country)
        predictions = fill_mask(prompt)
        for pred in predictions:
            token = pred["token_str"]
            score = pred["score"]
            outcome[country][token].append(score)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


  0%|          | 0/200 [00:00<?, ?it/s]

In [5]:
clean_outcome = {}
for country, data in outcome.items():
    clean_outcome[country] = {}
    for word, scores in data.items():
        score = np.array(scores).mean()
        clean_outcome[country][word] = score 
C = pd.DataFrame(clean_outcome).fillna(0, inplace=False)

In [6]:
C.sort_values(by='China', ascending=False).head(10)

Unnamed: 0,Italy,China,USA,UK,France,Germany,Poland,Senegal,Japan
meals,0.268108,0.304094,0.3661,0.374105,0.410986,0.307578,0.393254,0.361442,0.287104
food,0.12372,0.230597,0.240201,0.21291,0.124745,0.160925,0.190427,0.158983,0.140761
it,0.13281,0.129229,0.141229,0.233265,0.239801,0.303128,0.229455,0.161379,0.11756
rice,0.037298,0.082301,0.045754,0.024959,0.026315,0.0,0.033888,0.073039,0.094923
pork,0.0,0.067709,0.0,0.0,0.0,0.0,0.0,0.027674,0.033767
curry,0.0,0.058992,0.038285,0.135313,0.0,0.037359,0.039453,0.034195,0.041528
noodles,0.0,0.053894,0.0,0.0,0.0,0.0,0.0,0.0,0.0
chicken,0.0,0.045768,0.051774,0.046032,0.03442,0.025842,0.030577,0.034502,0.0
sushi,0.0,0.044151,0.049687,0.0,0.0,0.0,0.0,0.0,0.141082
beef,0.0,0.041092,0.044745,0.033112,0.0,0.0,0.0,0.036564,0.0


## Pseudo IDF

In [7]:
idf = {}
for token, data in C.iterrows():
    counter = len([x for x in data if x > 0])
    idf[token] = np.log(len(countries) / counter)
IDF = pd.Series(idf)

In [8]:
C = (C.T * IDF).T 

In [9]:
country_data = {}
for country in countries:
    country_data[country] = [x for x, y in C.sort_values(by=country, ascending=False).head(5)[country].items() if y > 0]

In [10]:
for country, data in country_data.items():
    print(f"{country}: {', '.join(data)}")

Italy:  spaghetti,  pasta,  pizza,  bread,  wine
China:  noodles,  pork,  sushi,  beef,  curry
USA:  sushi,  beef,  pizza,  cabbage,  cake
UK:  chips,  tea,  curry,  alcohol,  pizza
France:  lobster,  ham,  wine,  chocolate,  fish
Germany:  beer,  cheese,  this,  cabbage,  pasta
Poland:  bananas,  pizza,  cabbage,  chocolate,  this
Senegal:  bananas,  bread,  wine,  pork,  beef
Japan:  sushi,  tofu,  tea,  fish,  pork
