In [1]:
import pandas as pd
import checklist
from allennlp_models.pretrained import load_predictor
from allennlp_models.pretrained import get_pretrained_models
from checklist.editor import Editor
from checklist.perturb import Perturb
from checklist.test_types import MFT, INV, DIR
from checklist.expect import Expect
from checklist.pred_wrapper import PredictorWrapper

In [2]:
## define the amount of samples PER COUNTRY

nsamples = 100

In [3]:
editor = Editor()

In [4]:
srl_predictor = load_predictor('structured-prediction-srl-bert')

lerc is not a registered model.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [5]:
chinese_cities = list(editor.lexicons['country_city']['China'])
us_cities = list(editor.lexicons['country_city']['United_States'])
russian_cities = list(editor.lexicons['country_city']['Russia'])

china_names_male = editor.lexicons.male_from.China
us_names_male = editor.lexicons.male_from.United_States
russian_names_male = editor.lexicons.male_from.Russia

editor.add_lexicon("china_names_male", china_names_male, overwrite=True)
editor.add_lexicon("us_names_male", us_names_male, overwrite=True)
editor.add_lexicon("russian_names_male", russian_names_male, overwrite=True)

editor.add_lexicon("chinese_cities", chinese_cities, overwrite=True)
editor.add_lexicon("us_cities", us_cities, overwrite=True)
editor.add_lexicon("russian_cities", russian_cities, overwrite=True)

chinese_china = editor.template("{china_names_male} walks in {chinese_cities}.", meta=True, nsamples=10, remove_duplicates=True, seed=42)
usa_usa = editor.template("{us_names_male} walks in {us_cities}.", meta=True, nsamples=10, remove_duplicates=True, seed=42)
russian_russia = editor.template("{russian_names_male} walks in {russian_cities}.", meta=True, nsamples=10, remove_duplicates=True, seed=42)


In [6]:
def make_data_fairness(amount_per_format):
    list_japan = []
    list_usa = []
    list_france = []
    
    editor = Editor()
    
    japanese_cities = list(editor.lexicons['country_city']['Japan'])
    us_cities = list(editor.lexicons['country_city']['United_States'])
    french_cities = list(editor.lexicons['country_city']['France'])

    japanese_names_male = editor.lexicons.male_from.Japan
    us_names_male = editor.lexicons.male_from.United_States
    french_names_male = editor.lexicons.male_from.France

    editor.add_lexicon("japanese_names_male",japanese_names_male, overwrite=True)
    editor.add_lexicon("us_names_male", us_names_male, overwrite=True)
    editor.add_lexicon("french_names_male", french_names_male, overwrite=True)

    editor.add_lexicon("japanese_cities", japanese_cities, overwrite=True)
    editor.add_lexicon("us_cities", us_cities, overwrite=True)
    editor.add_lexicon("french_cities", french_cities, overwrite=True)

    japanese_japan = editor.template("{japanese_names_male} walks in {japanese_cities}.", 
                                    meta=True, 
                                    nsamples=amount_per_format, 
                                    remove_duplicates=True, 
                                    seed=18)
    list_japan.append(japanese_japan['data'])
    list_japan = [x for xs in list_japan for x in xs]
    
    
    usa_usa = editor.template("{us_names_male} walks in {us_cities}.", 
                              meta=True, 
                              nsamples=amount_per_format, 
                              remove_duplicates=True, 
                              seed=18)
    list_usa.append(usa_usa['data'])
    list_usa = [x for xs in list_usa for x in xs]
    
    french_france = editor.template("{french_names_male} walks in {french_cities}.", 
                                     meta=True, 
                                     nsamples=amount_per_format, 
                                     remove_duplicates=True, 
                                     seed=18)
    list_france.append(french_france['data'])
    list_france = [x for xs in list_france for x in xs]
    return list_japan, list_usa, list_france

In [7]:
def get_pp_tags(sentence):
    output = srl_predictor.predict(sentence)
    if len(output['verbs']) >= 1:
        pp_tags = output['verbs'][0]['tags']#['verb']#[0]['tags']
    else:
        pp_tags = sentence
    return pp_tags

def get_pp_descr(sentence):
    output = srl_predictor.predict(sentence)
    if len(output['verbs']) >= 1:
        pp_descr = output['verbs'][0]['description']
    else:
        pp_descr = sentence
    return pp_descr

def find_final_min_one_arg(list_of_arg):
    if list_of_arg:
        element = list_of_arg[-2]
    else:
        element = 'missed_verb'
    return element

In [8]:
list_japan, list_usa, list_france =  make_data_fairness(nsamples)

In [9]:
srl_predictor = load_predictor('structured-prediction-srl-bert')

df_japan = pd.DataFrame(list_japan)
df_japan['pp_tag'] = df_japan[0].apply(get_pp_tags)
df_japan['pp_descr'] = df_japan[0].apply(get_pp_descr)
df_japan[1] = df_japan['pp_tag'].apply(find_final_min_one_arg)

df_usa = pd.DataFrame(list_usa)
df_usa['pp_tag'] = df_usa[0].apply(get_pp_tags)
df_usa['pp_descr'] = df_usa[0].apply(get_pp_descr)
df_usa[1] = df_usa['pp_tag'].apply(find_final_min_one_arg)

df_france = pd.DataFrame(list_france)
df_france['pp_tag'] = df_france[0].apply(get_pp_tags)
df_france['pp_descr'] = df_france[0].apply(get_pp_descr)
df_france[1] = df_france['pp_tag'].apply(find_final_min_one_arg)

lerc is not a registered model.
Some weights of the model checkpoint at bert-base-uncased were not used when initializing BertModel: ['cls.predictions.decoder.weight', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.dense.bias', 'cls.predictions.bias', 'cls.seq_relationship.weight', 'cls.predictions.transform.LayerNorm.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


In [10]:
df_japan[0].to_csv("../datasets/fairness/japan.txt", header=None, index=False)
df_usa[0].to_csv("../datasets/fairness/usa.txt", header=None, index=False)
df_france[0].to_csv("../datasets/fairness/france.txt", header=None, index=False)

df_japan.to_csv("../results/fairness/japan_results_bert.csv")
df_usa.to_csv("../results/fairness/usa_results_bert.csv")
df_france.to_csv("../results/fairness/france_results_bert.csv")

In [11]:
error_japan_bert = df_japan[df_japan[1] != 'I-ARGM-LOC'].sort_values(by=[1])
error_usa_bert = df_usa[df_usa[1] != 'I-ARGM-LOC'].sort_values(by=[1])
error_france_bert = df_france[df_france[1] != 'I-ARGM-LOC'].sort_values(by=[1])

In [12]:
error_japan_bert[[0,'pp_descr']]

Unnamed: 0,0,pp_descr
87,まさる walks in Yao.,[ARG0: まさる] [V: walks] [ARGM-MNR: in Yao] .
83,けいじ walks in Kōchi.,[ARG0: けいじ] [V: walks] [ARGM-MNR: in Kōchi] .
5,あつし walks in Kōbe.,[ARG0: あつし] [V: walks] [ARGM-MNR: in Kōbe] .
64,すすむ walks in Higashiōsaka.,[ARG0: すすむ] [V: walks] [ARGM-MNR: in Higashiōs...
62,まさる walks in Kitakyūshū.,[ARG0: まさる] [V: walks] [ARGM-MNR: in Kitakyūsh...
8,たけし walks in Kōbe.,[ARG0: たけし] [V: walks] [ARGM-MNR: in Kōbe] .
56,あきら walks in Machida.,[ARG0: あきら] [V: walks] [ARGM-MNR: in Machida] .
15,つよし walks in Suita.,[ARG0: つよし] [V: walks] [ARGM-MNR: in Suita] .
35,みのる walks in Kōriyama.,[ARG0: みのる] [V: walks] [ARGM-MNR: in Kōriyama] .
28,ひろし walks in Kōriyama.,[ARG0: ひろし] [V: walks] [ARGM-MNR: in Kōriyama] .


In [13]:
error_usa_bert[[0,'pp_descr']]

Unnamed: 0,0,pp_descr
20,Matt walks in Garland.,[ARG0: Matt] [V: walks] [ARGM-MNR: in Garland] .
25,Daniel walks in Wichita.,[ARG0: Daniel] [V: walks] [ARGM-MNR: in Wichit...
42,Rick walks in Garland.,[ARG0: Rick] [V: walks] [ARGM-MNR: in Garland] .
50,Jack walks in Madison.,[ARG0: Jack] [V: walks] [ARGM-MNR: in Madison] .
55,Sam walks in Wichita.,[ARG0: Sam] [V: walks] [ARGM-MNR: in Wichita] .
97,Fred walks in Mesa.,Fred walks in Mesa.


In [14]:
error_japan_bert[[0,'pp_descr']]

Unnamed: 0,0,pp_descr
87,まさる walks in Yao.,[ARG0: まさる] [V: walks] [ARGM-MNR: in Yao] .
83,けいじ walks in Kōchi.,[ARG0: けいじ] [V: walks] [ARGM-MNR: in Kōchi] .
5,あつし walks in Kōbe.,[ARG0: あつし] [V: walks] [ARGM-MNR: in Kōbe] .
64,すすむ walks in Higashiōsaka.,[ARG0: すすむ] [V: walks] [ARGM-MNR: in Higashiōs...
62,まさる walks in Kitakyūshū.,[ARG0: まさる] [V: walks] [ARGM-MNR: in Kitakyūsh...
8,たけし walks in Kōbe.,[ARG0: たけし] [V: walks] [ARGM-MNR: in Kōbe] .
56,あきら walks in Machida.,[ARG0: あきら] [V: walks] [ARGM-MNR: in Machida] .
15,つよし walks in Suita.,[ARG0: つよし] [V: walks] [ARGM-MNR: in Suita] .
35,みのる walks in Kōriyama.,[ARG0: みのる] [V: walks] [ARGM-MNR: in Kōriyama] .
28,ひろし walks in Kōriyama.,[ARG0: ひろし] [V: walks] [ARGM-MNR: in Kōriyama] .


In [15]:
error_rate_france_bert = len(df_france[df_france[1]!='I-ARGM-LOC'])/len(df_france)*100
error_rate_japan_bert = len(df_japan[df_japan[1]!='I-ARGM-LOC'])/len(df_japan)*100
error_rate_usa_bert = len(df_usa[df_usa[1]!='I-ARGM-LOC'])/len(df_usa)*100

In [16]:
srl_predictor = load_predictor('structured-prediction-srl')

df_japan = pd.DataFrame(list_japan)
df_japan['pp_tag'] = df_japan[0].apply(get_pp_tags)
df_japan['pp_descr'] = df_japan[0].apply(get_pp_descr)
df_japan[1] = df_japan['pp_tag'].apply(find_final_min_one_arg)

df_usa = pd.DataFrame(list_usa)
df_usa['pp_tag'] = df_usa[0].apply(get_pp_tags)
df_usa['pp_descr'] = df_usa[0].apply(get_pp_descr)
df_usa[1] = df_usa['pp_tag'].apply(find_final_min_one_arg)

df_france = pd.DataFrame(list_france)
df_france['pp_tag'] = df_france[0].apply(get_pp_tags)
df_france['pp_descr'] = df_france[0].apply(get_pp_descr)
df_france[1] = df_france['pp_tag'].apply(find_final_min_one_arg)

lerc is not a registered model.


In [17]:
df_japan.to_csv("../results/fairness/japan_results_bilstm.csv")
df_usa.to_csv("../results/fairness/usa_results_bilstm.csv")
df_france.to_csv("../results/fairness/france_results_bilstm.csv")

In [18]:
error_japan_bilstm = df_japan[df_japan[1] != 'I-ARGM-LOC'].sort_values(by=[1])
error_usa_bilstm = df_usa[df_usa[1] != 'I-ARGM-LOC'].sort_values(by=[1])
error_france_bilstm = df_france[df_france[1] != 'I-ARGM-LOC'].sort_values(by=[1])

In [19]:
display(error_japan_bilstm[[0,'pp_descr']])

display(error_usa_bilstm[[0,'pp_descr']])

display(error_france_bilstm[[0,'pp_descr']])

Unnamed: 0,0,pp_descr
0,しんいち walks in Amagasaki.,[V: しんいち] [ARG1: walks in Amagasaki] .
4,だいすけ walks in Yokkaichi.,[V: だいすけ] [ARG1: walks in Yokkaichi] .
71,あきら walks in Yokohama.,[ARG0: あきら] [V: walks] [ARG1: in Yokohama] .
43,だいすけ walks in Chiba.,[V: だいすけ] [ARG1: walks in Chiba] .
37,こういち walks in Tsukuba.,[V: こういち] [ARG1: walks in Tsukuba] .
29,まさお walks in Kurume.,[V: まさお] [ARG1: walks in Kurume] .
25,えいじ walks in Fuchū.,[V: えいじ] [ARG1: walks in Fuchū] .
90,ゆたか walks in Saitama.,[ARG0: ゆたか] [V: walks] [ARGM-DIR: in Saitama] .
99,かずや walks in Saitama.,[ARG0: かずや] [V: walks] [ARGM-DIR: in Saitama] .
16,まさし walks in Kashiwa.,まさし walks in Kashiwa.


Unnamed: 0,0,pp_descr
2,Daniel walks in Anchorage.,[ARG0: Daniel] [V: walks] in [ARG1: Anchorage] .
61,Bob walks in Albuquerque.,[ARG0: Bob] [V: walks] [ARG1: in] [ARG1: Albuq...
0,Samuel walks in Virginia Beach.,[ARG0: Samuel] [V: walks] [ARG1: in Virginia B...
65,Matthew walks in Baton Rouge.,[ARG0: Matthew] [V: walks] [ARG1: in Baton Rou...
72,Aaron walks in Tulsa.,[ARG0: Aaron] [V: walks] [ARG1: in Tulsa] .
74,Gary walks in Fresno.,[ARG0: Gary] [V: walks] [ARG1: in Fresno] .
75,Billy walks in Chicago.,[ARG0: Billy] [V: walks] [ARG1: in Chicago] .
76,Jeff walks in New York City.,[ARG0: Jeff] [V: walks] [ARG1: in New York Cit...
78,Tony walks in Colorado Springs.,[ARG0: Tony] [V: walks] [ARG1: in Colorado Spr...
79,Albert walks in Lincoln.,[ARG0: Albert] [V: walks] [ARG1: in Lincoln] .


Unnamed: 0,0,pp_descr
51,Thierry walks in Cannes.,[ARG0: Thierry] [V: walks] [ARG1: in Cannes] .
46,Jules walks in Cannes.,[ARG0: Jules] [V: walks] [ARG1: in Cannes] .
98,Jules walks in Avignon.,[ARG0: Jules] [V: walks] [ARG1: in Avignon] .
52,Thierry walks in Poitiers.,[ARG0: Thierry] [V: walks] [ARG1: in Poitiers] .
58,Alfred walks in Roubaix.,[ARG0: Alfred] [V: walks] [ARG1: in Roubaix] .
26,David walks in Fort-de-France.,[ARG0: David] [V: walks] [ARG1: in Fort - de -...
65,Charles walks in Angers.,[ARG0: Charles] [V: walks] [ARG1: in Angers] .
24,Albert walks in Aubervilliers.,[ARG0: Albert] [V: walks] [ARG1: in Aubervilli...
79,Jean-Pierre walks in La Rochelle.,[ARG0: Jean - Pierre] [V: walks] [ARG1: in La ...
2,Jean walks in Saint-Pierre.,[ARG0: Jean] [V: walks] [ARG1: in Saint - Pier...


In [20]:
error_rate_france_bilstm = len(df_france[df_france[1]!='I-ARGM-LOC'])/len(df_france)*100
error_rate_japan_bilstm = len(df_japan[df_japan[1]!='I-ARGM-LOC'])/len(df_japan)*100
error_rate_usa_bilstm = len(df_usa[df_usa[1]!='I-ARGM-LOC'])/len(df_usa)*100

In [21]:
print("BERT ERROR RATE")
print("france: " , error_rate_france_bert)
print("japan: " , error_rate_japan_bert)
print("usa: " , error_rate_usa_bert)
print()
print("bilstm ERROR RATE")
print("france: " , error_rate_france_bilstm)
print("japan: " , error_rate_japan_bilstm)
print("usa: " , error_rate_usa_bilstm)

BERT ERROR RATE
france:  24.0
japan:  42.0
usa:  6.0

bilstm ERROR RATE
france:  56.00000000000001
japan:  34.0
usa:  38.0
