In [None]:
!pip install git+https://github.com/millawell/bias-ml-dh.git#subdirectory=material/notebooks/bias_ml_dh_utils
!pip install --upgrade tqdm

In [None]:
#WEAT Word Embedding Association Tests
import torch as tr
import numpy as np
from bisect import bisect_left
import spacy
from tqdm.notebook import tqdm
import itertools
nlp = spacy.load("en_core_web_sm")
tokenizer = nlp.Defaults.create_tokenizer(nlp)
import bias_ml_dh_utils as utils
path_glove = './'

In [None]:
embedding_dim = 50
embedding_path = '{}/glove.6B/glove.6B.{}d.txt'.format(path_glove, embedding_dim)

vocab = utils.load_vocab(embedding_path)

embedding_matrix = utils.create_embedding_matrix(embedding_path, vocab, embedding_dim)

In [None]:
def word_attribute_association(w, A, B, vocab):
    #s(w,A,B) = mean_a cos(w,a) - mean_b cos(w,b)
    A_embed = utils.lookup_embeddings(A, vocab, embedding_matrix)
    B_embed = utils.lookup_embeddings(B, vocab, embedding_matrix)
    w_embed = utils.lookup_embeddings(w,vocab, embedding_matrix)
    
    wA = np.dot(w_embed/np.linalg.norm(w_embed, axis=1)[:,np.newaxis],
                (A_embed/np.linalg.norm(A_embed, axis=1)[:,np.newaxis]).T).sum()
    wB = np.dot(w_embed/np.linalg.norm(w_embed, axis=1)[:,np.newaxis],
                (B_embed/np.linalg.norm(B_embed, axis=1)[:,np.newaxis]).T).sum()
    
    return wA/len(A) -  wB/len(B)

def test_statistic(A,B,X,Y, vocab):
    
    wA = 0
    
    for ix in X:
        wA += word_attribute_association([ix], A, B, vocab)
        
    wB = 0
    
    for iy in Y:
        wB -= word_attribute_association([iy], A, B, vocab)
        
    return wA+wB

def calculate_pvalue(A,B,X,Y,vocab,alpha=0.05):
    
    #check out-of-vocab words
    A = list(set(A).intersection(vocab))
    B = list(set(B).intersection(vocab))
    X = list(set(X).intersection(vocab))
    Y = list(set(Y).intersection(vocab))
        
    
    test_stat_orig = test_statistic(A,B,X,Y,vocab)
    
    union = set(X+Y)
    subset_size = len(union)//2
    
    larger = 0
    total = 0
    
    for subset in tqdm(set(itertools.combinations(union, subset_size))):
        total += 1
        Xi = list(set(subset))
        Yi = list(union - set(subset))
        if test_statistic(A, B, Xi, Yi,vocab) > test_stat_orig:
            larger += 1
    if larger/float(total)<alpha:
        print("The difference between the attributes {} and {} \nfor the given target words is significant.".format(A,B))
    else:
        print("The difference between the attributes {} and {} \nfor the given target words is not significant.".format(A,B))

    return larger/float(total)

In [None]:
############################################################################
#We use a one-sided test, therefore it is not symmetric. 
#The target words you assume are more associated with A should be in X 
#and respectively the target words for B should be in Y
############################################################################
#choose your attributes in A and B
A = ['female', 'woman']
B = ['male', 'man']

#choose your target words in X and Y
X = ['home', 'parents', 'children', 'family', 'cousins', 'marriage', 'wedding', 'relatives']
# X = ['nurse','teacher','librarian']
Y = ['executive', 'management', 'professional', 'corporation', 'salary', 'office', 'business', 'career']
# Y = ['programmer','engineer','scientist']

p = calculate_pvalue(A,B,X,Y, vocab)
print(p)