# INFORMATION RETRIEVAL PROJECT
# 4. ANALYSIS OF GENDER STEREOTYPES BY YEARS - WEAT, ECT

---
## Gender stereotypes in parliamentary speeches

In word embedding models, each word is assigned to a high-dimensional vector such that the geometry of the vectors captures semantic relations between the words – e.g. vectors being closer together has been shown to correspond to more similar words. Recent works in machine learning demonstrate that word embeddings also capture common stereotypes, as these stereotypes are likely to be present, even if subtly, in the large corpora of training texts. These stereotypes are automatically learned by the embedding algorithm and could be problematic in many context if the embedding is then used for sensitive applications such as search rankings, product recommendations, or translations. An important direction of research is on developing algorithms to debias the word embeddings.

This project aims to use the word embeddings to study historical trends – specifically trends in the gender and ethnic stereotypes in the Italian parliamentary speeches from 1948 to 2020.

In [1]:
import numpy as np
import pandas as pd
import gensim
from gensim.models import KeyedVectors
from gensim.models import Word2Vec
import os
from tqdm.auto import tqdm
from collections import defaultdict, OrderedDict

from INFORET_project import load_embed_model
# import matplotlib.pylab as plt
pd.set_option("display.max_rows", 100, "display.max_columns", 100)

In [2]:
from INFORET_project import YEARS

In [3]:
YEARS

['1948_1968', '1968_1985', '1985_2000', '2000_2020']

In [4]:
model = load_embed_model(YEARS[0])

In [7]:
model = load_embed_model(YEARS[1])

In [8]:
model = load_embed_model(YEARS[2])

In [None]:
model = load_embed_model(YEARS[3])

---

# 3) WEAT and ECT

## WEAT

In [4]:
from INFORET_project import WEAT
from INFORET_project.data import gendered_neutral_words
from INFORET_project import PAIRS_WORDS_GROUP, WORDS_GROUP

In [9]:
WEAT(model.wv, 
     first_target={'name':'career', 'words': gendered_neutral_words['career']},
     second_target={'name':'family', 'words': gendered_neutral_words['family']},
     first_attribute={'name':'donna', 'words': gendered_neutral_words['female']},
     second_attribute={'name':'uomo', 'words': gendered_neutral_words['male']}
)

# WEAT result (score, size effect, Nt, Na and p-value)
# score: z-score. result of the test statistic
# size effect: intensity of the effect, how much the 2 samples are separated
# p-value: The null hypothesis is that there is no difference between the two sets of target words in 
#terms of their relative similarity to the two sets of attribute words.
# Nt: dimension of target (6x2: 6 words for 2 targets)
# Na: dimension of attributes (8x2: 8 words for 2 attributes)

# low p-value, so H0 rejected

{'Target words': 'career vs. family',
 'Attrib. words': 'donna vs. uomo',
 's': -0.752927340567112,
 'd': -1.6041145,
 'p': 0.9989177489177489,
 'Nt': '10x2',
 'Na': '3x2'}

In [10]:
WEAT(model.wv, 
     first_target={'name':'male_stereotypes', 'words': gendered_neutral_words['male_stereotypes']},
     second_target={'name':'female_stereotypes', 'words': gendered_neutral_words['female_stereotypes']},
     first_attribute={'name':'donna', 'words': gendered_neutral_words['female']},
     second_attribute={'name':'uomo', 'words': gendered_neutral_words['male']}
)

{'Target words': 'male_stereotypes vs. female_stereotypes',
 'Attrib. words': 'donna vs. uomo',
 's': -0.24709320068359375,
 'd': -0.95323783,
 'p': 0.9557109557109557,
 'Nt': '7x2',
 'Na': '3x2'}

---

In [15]:
def show_weat_by_year(year):
    
    model = load_embed_model(year)
    
    for pair in PAIRS_WORDS_GROUP:
        w1,w2 = pair
        
        display(
            WEAT(model.wv, 
         first_target={'name':f'{w1}', 'words': gendered_neutral_words[w1]},
         second_target={'name':f'{w2}', 'words': gendered_neutral_words[w2]},
         first_attribute={'name':'donna', 'words': gendered_neutral_words['female']},
         second_attribute={'name':'uomo', 'words': gendered_neutral_words['male']})
        )

In [16]:
show_weat_by_year(YEARS[0])

{'Target words': 'family vs. career',
 'Attrib. words': 'donna vs. uomo',
 's': 0.752927340567112,
 'd': 1.6041145,
 'p': 0.0,
 'Nt': '6x2',
 'Na': '3x2'}

{'Target words': 'rage vs. kindness',
 'Attrib. words': 'donna vs. uomo',
 's': 0.03358858823776245,
 'd': 0.17383628,
 'p': 0.38852813852813856,
 'Nt': '6x2',
 'Na': '3x2'}

{'Target words': 'intelligence vs. dumbness',
 'Attrib. words': 'donna vs. uomo',
 's': -0.30264808237552643,
 'd': -1.2351941,
 'p': 0.9682539682539683,
 'Nt': '5x2',
 'Na': '3x2'}

{'Target words': 'active vs. passive',
 'Attrib. words': 'donna vs. uomo',
 's': -0.19418787956237793,
 'd': -1.7242286,
 'p': 0.95,
 'Nt': '3x2',
 'Na': '3x2'}

{'Target words': 'female_stereotypes vs. male_stereotypes',
 'Attrib. words': 'donna vs. uomo',
 's': 0.24709320068359375,
 'd': 0.9532377,
 'p': 0.043997668997669,
 'Nt': '13x2',
 'Na': '3x2'}

In [16]:
# NB: a large p-value means that we accept H0, so that the 2 target groups have no statistical
#difference when compared to the genders. since the control group contains gendered words, having a 
#large p-value means that the group to test has a strong gender component, so it is biased. 
# thus, the groups will be ranked from the highest to the lowest p-value

def show_weat_by_year_control(year, verbose=True):
    
    model = load_embed_model(year)
    
    weat_top_biased = []
    
    for w in WORDS_GROUP:
        
        weat = WEAT(model.wv, 
                first_target={'name':'gendered_words', 'words': gendered_neutral_words['gendered_words']},
                second_target={'name':f'{w}', 'words': gendered_neutral_words[w]},
                first_attribute={'name':'donna', 'words': gendered_neutral_words['female']},
                second_attribute={'name':'uomo', 'words': gendered_neutral_words['male']})
            
        weat_top_biased.append( (w, weat['p']) )

        if verbose:
            display(weat)
        
    return sorted(weat_top_biased, key=lambda x: x[1], reverse=True)

In [19]:
ranked_weat = show_weat_by_year_control(YEARS[1])

{'Target words': 'gendered_words vs. adj_appearence',
 'Attrib. words': 'donna vs. uomo',
 's': 0.4247361421585083,
 'd': 0.6344866,
 'p': 0.08651410508995648,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. family',
 'Attrib. words': 'donna vs. uomo',
 's': -0.1948731690645218,
 'd': -0.44564193,
 'p': 0.7608225108225108,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. career',
 'Attrib. words': 'donna vs. uomo',
 's': 1.1725627165287733,
 'd': 1.3028558,
 'p': 0.0009796704843144472,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. rage',
 'Attrib. words': 'donna vs. uomo',
 's': 0.322650209069252,
 'd': 0.7504526,
 'p': 0.14502164502164502,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. kindness',
 'Attrib. words': 'donna vs. uomo',
 's': 0.5006775185465813,
 'd': 1.0376654,
 'p': 0.04004329004329004,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. intelligence',
 'Attrib. words': 'donna vs. uomo',
 's': 0.5428943261504173,
 'd': 1.2084794,
 'p': 0.01984126984126984,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. dumbness',
 'Attrib. words': 'donna vs. uomo',
 's': 0.16958500444889069,
 'd': 0.44721156,
 'p': 0.25396825396825395,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. active',
 'Attrib. words': 'donna vs. uomo',
 's': 0.27020979672670364,
 'd': 1.0259635,
 'p': 0.1,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. passive',
 'Attrib. words': 'donna vs. uomo',
 's': 0.22756554186344147,
 'd': 0.59813553,
 'p': 0.1984126984126984,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. gendered_words',
 'Attrib. words': 'donna vs. uomo',
 's': 0.0,
 'd': 0.0,
 'p': 0.4972287774145359,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. female_stereotypes',
 'Attrib. words': 'donna vs. uomo',
 's': 0.629240594804287,
 'd': 0.86994576,
 'p': 0.02830760570698651,
 'Nt': '10x2',
 'Na': '3x2'}

{'Target words': 'gendered_words vs. male_stereotypes',
 'Attrib. words': 'donna vs. uomo',
 's': 0.6358938738703728,
 'd': 1.0684642,
 'p': 0.030011655011655012,
 'Nt': '10x2',
 'Na': '3x2'}

In [20]:
ranked_weat

[('family', 0.7608225108225108),
 ('gendered_words', 0.4972287774145359),
 ('dumbness', 0.25396825396825395),
 ('passive', 0.1984126984126984),
 ('rage', 0.14502164502164502),
 ('active', 0.1),
 ('adj_appearence', 0.08651410508995648),
 ('kindness', 0.04004329004329004),
 ('male_stereotypes', 0.030011655011655012),
 ('female_stereotypes', 0.02830760570698651),
 ('intelligence', 0.01984126984126984),
 ('career', 0.0009796704843144472)]

---

In [6]:
WEAT_group_bias = defaultdict(dict)

for year in tqdm(YEARS,
                 desc='Passing years'):
    WEAT_group_bias[year] = show_weat_by_year_control(year, verbose=False)

HBox(children=(HTML(value='Passing years'), FloatProgress(value=0.0, max=4.0), HTML(value='')))




In [7]:
#%store data
%store -r results

In [8]:
for year in YEARS:
    results[f'WEAT_{year}'] = [group[0] for group in WEAT_group_bias[year]]

In [9]:
%store results
results

Stored 'results' (DataFrame)


Unnamed: 0,EAB_1948_1968,EAB_avg_gender_1948_1968,ECT_1948_1968,WEAT_1948_1968,EAB_1968_1985,EAB_avg_gender_1968_1985,ECT_1968_1985,WEAT_1968_1985,EAB_1985_2000,EAB_avg_gender_1985_2000,ECT_1985_2000,WEAT_1985_2000,EAB_2000_2020,EAB_avg_gender_2000_2020,ECT_2000_2020,WEAT_2000_2020
0,gendered_words,gendered_words,,family,gendered_words,gendered_words,,family,gendered_words,gendered_words,,family,gendered_words,gendered_words,,family
1,active,intelligence,,gendered_words,active,family,,gendered_words,family,family,,gendered_words,passive,family,,gendered_words
2,adj_appearence,adj_appearence,,dumbness,passive,adj_appearence,,dumbness,passive,career,,passive,family,rage,,passive
3,male_stereotypes,family,,passive,kindness,dumbness,,passive,adj_appearence,passive,,active,rage,passive,,dumbness
4,kindness,career,,rage,intelligence,intelligence,,rage,rage,kindness,,dumbness,intelligence,career,,rage
5,passive,male_stereotypes,,adj_appearence,adj_appearence,female_stereotypes,,active,kindness,adj_appearence,,rage,adj_appearence,dumbness,,kindness
6,family,passive,,kindness,family,kindness,,adj_appearence,active,female_stereotypes,,kindness,kindness,female_stereotypes,,female_stereotypes
7,intelligence,female_stereotypes,,female_stereotypes,male_stereotypes,male_stereotypes,,kindness,female_stereotypes,intelligence,,male_stereotypes,male_stereotypes,male_stereotypes,,adj_appearence
8,female_stereotypes,kindness,,career,female_stereotypes,passive,,male_stereotypes,career,active,,female_stereotypes,active,kindness,,male_stereotypes
9,rage,rage,,male_stereotypes,rage,career,,female_stereotypes,male_stereotypes,dumbness,,intelligence,female_stereotypes,intelligence,,career


## ECT

In [10]:
#from INFORET_project.utils import fast_cosine_sim, calculate_avg_vector, to_list
from INFORET_project import ECT
from INFORET_project import WORDS_GROUP
from INFORET_project.data import gendered_neutral_words

In [6]:
ect = ECT(model.wv, gendered_neutral_words['female'], gendered_neutral_words['male'])

spearman_corr = ect.get_bias(neutral_words = gendered_neutral_words['family'],
                         verbose=True)

ECT for words: ['famiglia', 'figlio', 'matrimonio', 'genitore', 'bambino', 'accudire']

Spearman correlation has value 0.7714 with p-value 0.0724
High correlation --> Low bias



In [7]:
biased_words = ect.get_cosine_sim_words()

Cosine similarity of 'bambino' to 'female' is 0.5360, to 'male' is 0.4293
Cosine similarity of 'genitore' to 'female' is 0.3743, to 'male' is 0.4375
Cosine similarity of 'accudire' to 'female' is 0.3432, to 'male' is 0.3072
Cosine similarity of 'figlio' to 'female' is 0.4409, to 'male' is 0.4577
Cosine similarity of 'matrimonio' to 'female' is 0.3251, to 'male' is 0.3103
Cosine similarity of 'famiglia' to 'female' is 0.3612, to 'male' is 0.3747


In [8]:
biased_words

[('bambino', 0.536, 0.4293),
 ('genitore', 0.3743, 0.4375),
 ('accudire', 0.3432, 0.3072),
 ('figlio', 0.4409, 0.4577),
 ('matrimonio', 0.3251, 0.3103),
 ('famiglia', 0.3612, 0.3747)]

In [9]:
ect = ECT(model.wv, gendered_neutral_words['female'], gendered_neutral_words['male'])

spearman_corr = ect.get_bias(neutral_words = gendered_neutral_words['passive'],
                         verbose=True)

ECT for words: ['timido', 'passivo', 'insicuro', 'debole', 'silenzioso']

Spearman correlation has value 0.9000 with p-value 0.0374
High correlation --> Low bias



In [10]:
biased_words = ect.get_cosine_sim_words()

Cosine similarity of 'debole' to 'female' is 0.1828, to 'male' is 0.2310
Cosine similarity of 'passivo' to 'female' is 0.1363, to 'male' is 0.1141
Cosine similarity of 'timido' to 'female' is 0.1614, to 'male' is 0.1810
Cosine similarity of 'silenzioso' to 'female' is 0.2889, to 'male' is 0.2705
Cosine similarity of 'insicuro' to 'female' is 0.2304, to 'male' is 0.2268


---

In [8]:
# comparazoone metodi diversi
# ob funzioni: capire qnd parola legata a genere
# fai ranking parole, se i metodi sono robusti.
# studia intersezioni tra parole
# fai manualmente liste di parole polarizzate e parole neutre, vedi
# se i metodi le trovano nella posizione giusta.
# applica debias e vedi differenze?
# baseline: dividi i doc in 2 gruppi M,F e calcola misura di specificità di
#parole (TFIDF, PMI) per vedere se sono caratteristiche.
# i metodi fanno meglio? 

# correlazione tra stereotipi e numero di donne in aumento del parlamentoà

In [21]:
ECT_group_bias = defaultdict(list)
ECT_top_bias = defaultdict(lambda: defaultdict(OrderedDict))

for year in tqdm(YEARS,
                   desc='Passing models'
                  ):
    model = load_embed_model(year)
    
    g=[]
    for group in tqdm(WORDS_GROUP,
                        desc='Passing groups of words'
                     ):
        
        ect = ECT(model.wv, gendered_neutral_words['female'], gendered_neutral_words['male'])
        spearman_corr = ect.get_bias(neutral_words=gendered_neutral_words[group],
                         verbose=False)
        biased_words = ect.get_cosine_sim_words(verbose=False)
    
        g.append((group, spearman_corr))
        ECT_top_bias[year][group] = biased_words
        
    ECT_group_bias[year] = sorted(g, key=lambda x: x[1]) 

HBox(children=(HTML(value='Passing models'), FloatProgress(value=0.0, max=4.0), HTML(value='')))

HBox(children=(HTML(value='Passing groups of words'), FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(HTML(value='Passing groups of words'), FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(HTML(value='Passing groups of words'), FloatProgress(value=0.0, max=12.0), HTML(value='')))




HBox(children=(HTML(value='Passing groups of words'), FloatProgress(value=0.0, max=12.0), HTML(value='')))





In [12]:
ECT_top_bias = OrderedDict(ECT_top_bias)
%store ECT_top_bias

Stored 'ECT_top_bias' (OrderedDict)


In [21]:
for year in YEARS:
    display(ECT_group_bias[year])

[('adj_appearence', 0.29696969696969694),
 ('gendered_words', 0.5272727272727272),
 ('dumbness', 0.6),
 ('rage', 0.6571428571428573),
 ('female_stereotypes', 0.7197802197802198),
 ('family', 0.7714285714285715),
 ('career', 0.8424242424242423),
 ('passive', 0.8999999999999998),
 ('kindness', 0.942857142857143),
 ('intelligence', 0.9999999999999999),
 ('active', 1.0),
 ('male_stereotypes', 1.0)]

[('family', 0.08571428571428573),
 ('active', 0.5),
 ('female_stereotypes', 0.6758241758241759),
 ('intelligence', 0.7),
 ('passive', 0.7),
 ('adj_appearence', 0.709090909090909),
 ('career', 0.7696969696969697),
 ('kindness', 0.7714285714285715),
 ('dumbness', 0.7999999999999999),
 ('gendered_words', 0.8181818181818182),
 ('rage', 0.8857142857142858),
 ('male_stereotypes', 0.8928571428571429)]

[('gendered_words', 0.05454545454545454),
 ('kindness', 0.3714285714285715),
 ('career', 0.41818181818181815),
 ('female_stereotypes', 0.6538461538461539),
 ('family', 0.7714285714285715),
 ('rage', 0.7714285714285715),
 ('intelligence', 0.7999999999999999),
 ('adj_appearence', 0.8571428571428572),
 ('dumbness', 0.8999999999999998),
 ('passive', 0.8999999999999998),
 ('male_stereotypes', 0.9642857142857145),
 ('active', 1.0)]

[('gendered_words', -0.06666666666666665),
 ('kindness', 0.3714285714285715),
 ('passive', 0.39999999999999997),
 ('career', 0.41818181818181815),
 ('intelligence', 0.6),
 ('family', 0.7714285714285715),
 ('active', 0.7999999999999999),
 ('male_stereotypes', 0.8095238095238096),
 ('female_stereotypes', 0.8131868131868131),
 ('rage', 0.8857142857142858),
 ('adj_appearence', 0.9166666666666666),
 ('dumbness', 0.9999999999999999)]

In [13]:
#%store data
%store -r results

In [14]:
for year in YEARS:
    results[f'ECT_{year}'] = [group[0] for group in ECT_group_bias[year]]

In [15]:
%store results
results

Stored 'results' (DataFrame)


Unnamed: 0,EAB_1948_1968,EAB_avg_gender_1948_1968,ECT_1948_1968,WEAT_1948_1968,EAB_1968_1985,EAB_avg_gender_1968_1985,ECT_1968_1985,WEAT_1968_1985,EAB_1985_2000,EAB_avg_gender_1985_2000,ECT_1985_2000,WEAT_1985_2000,EAB_2000_2020,EAB_avg_gender_2000_2020,ECT_2000_2020,WEAT_2000_2020
0,gendered_words,gendered_words,adj_appearence,family,gendered_words,gendered_words,family,family,gendered_words,gendered_words,gendered_words,family,gendered_words,gendered_words,gendered_words,family
1,active,intelligence,gendered_words,gendered_words,active,family,active,gendered_words,family,family,kindness,gendered_words,passive,family,kindness,gendered_words
2,adj_appearence,adj_appearence,dumbness,dumbness,passive,adj_appearence,female_stereotypes,dumbness,passive,career,career,passive,family,rage,passive,passive
3,male_stereotypes,family,rage,passive,kindness,dumbness,intelligence,passive,adj_appearence,passive,female_stereotypes,active,rage,passive,career,dumbness
4,kindness,career,female_stereotypes,rage,intelligence,intelligence,passive,rage,rage,kindness,family,dumbness,intelligence,career,intelligence,rage
5,passive,male_stereotypes,family,adj_appearence,adj_appearence,female_stereotypes,adj_appearence,active,kindness,adj_appearence,rage,rage,adj_appearence,dumbness,family,kindness
6,family,passive,career,kindness,family,kindness,career,adj_appearence,active,female_stereotypes,intelligence,kindness,kindness,female_stereotypes,active,female_stereotypes
7,intelligence,female_stereotypes,passive,female_stereotypes,male_stereotypes,male_stereotypes,kindness,kindness,female_stereotypes,intelligence,adj_appearence,male_stereotypes,male_stereotypes,male_stereotypes,male_stereotypes,adj_appearence
8,female_stereotypes,kindness,kindness,career,female_stereotypes,passive,dumbness,male_stereotypes,career,active,dumbness,female_stereotypes,active,kindness,female_stereotypes,male_stereotypes
9,rage,rage,intelligence,male_stereotypes,rage,career,gendered_words,female_stereotypes,male_stereotypes,dumbness,passive,intelligence,female_stereotypes,intelligence,rage,career


---