In [89]:
import urllib.request
import numpy as np
import json
import string
from sklearn import metrics as sk_m
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
import sklearn as sk

In [2]:
# URL to retrive pre-trained 300 dimensional gloVe embedding
embedding_300_url = "http://www.cs.virginia.edu/~tw8cb/word_embeddings/vectors.txt"

def read_embedding(url):
    """Function to read out an embedding
    Input: url: url to embedding
    
    Returns: vocab: list of words in the embedding
             w2id: dictionary mapping words to ids
             embedding: array storing the word vectors,
                           row corresponds to word id"""
    # Open url
    data = urllib.request.urlopen(url)
    vocab = []
    embedding = []
    
    # Each line contains one word and its embedding
    for line in data:
        line = line.decode()
        # Split by spaces
        split = line.split()
        # First element(== the word) is added to vocabulary
        vocab.append(split[0])
        # All other elements(embedding vectors) are added to vectors
        embedding.append([float(elem) for elem in split[1:]])
    
    # Create a dictionary with word-id pairs based on the order
    w2id = {w: i for i, w in enumerate(vocab)}
    # Vectors are converted into an array
    embedding = np.array(embedding).astype(float)
    
    return vocab, w2id, embedding
    
vocab_original, w2id_original, embedding_original = read_embedding(embedding_300_url)

In [3]:
def hasDigit(word):
    """Checks if a string contains any digits"""
    return any(char.isdigit() for char in word)

def hasSpecialChar(word):
    """Checks if a string contains special characters(except "_")"""
    special_characters = "!@#$%^&*()-+?=,<>/."
    return any(char in special_characters for char in word)

In [4]:
def restrict_vocab(vocab, w2id, embedding):
    """Limits the vocab by removing words containing digits or special characters
    Input: vocab: list of words in the embedding
           w2id: dictionary mapping words to ids
           embedding: array storing the word vectors
           
    Returns: limit_vocab: list of words in vocab that do not include digits or special characters
             limit_w2id: dictionary mapping words in limit_vocab to new ids
             limit_embedding: array storing the word vectors of the words in limit_vocab only"""
    limit_vocab = []
    limit_embedding = []
    
    for word in vocab:
        # If word includes either a digit or a special character move on to next word
        if hasDigit(word) or hasSpecialChar(word):
            continue
        # Else add word to limit_vocab and its embedding to limit_embedding    
        limit_vocab.append(word)
        limit_embedding.append(embedding[w2id[word]])
        
    # Convert embedding into an array    
    limit_embedding = np.array(limit_embedding).astype(float)
    # Create new dictionary containing only the words in limit_vocab and their new ids
    limit_w2id = {word: i for i, word in enumerate(limit_vocab)}
    
    return limit_vocab, limit_w2id, limit_embedding

In [5]:
vocab, w2id, embedding = restrict_vocab(vocab_original, w2id_original, embedding_original)
print("Original vocab size: ", len(vocab_original))
print("Restricted vocab size: ", len(vocab))

Original vocab size:  322636
Restricted vocab size:  314952


In [6]:
def exclude_vocab(vocab, exclude):
    """Function to exclude specific words from vocabulary
    Input: vocab: list of words in the embedding
           exclude: list of words to exclude from the vocabulary
           
    Returns: limited_vocab: vocab without the words in exclude"""
    # Create copies of vocab, word2id and word_vector
    limited_vocab = vocab.copy()
    # For all words that are in exclude and vocab
    for word in exclude:
        if word in limited_vocab:
            # Remove word from vocab
            limited_vocab.remove(word)
            
    return limited_vocab

In [7]:
# URL to female specific words as listed by the authors
female_words_url = "https://raw.githubusercontent.com/uvavision/Double-Hard-Debias/master/data/female_word_file.txt"
female_words_data = urllib.request.urlopen(female_words_url)

# List of female words
female_words = []
for line in female_words_data:
    line = line.decode()
    line = line.split()
    female_words.append(line[0])

In [8]:
# URL to male specific words as listed by the authors
male_words_url = "https://raw.githubusercontent.com/uvavision/Double-Hard-Debias/master/data/male_word_file.txt"
male_words_data = urllib.request.urlopen(male_words_url)

# List of male words
male_words = []
for line in male_words_data:
    line = line.decode()
    line = line.split()
    male_words.append(line[0])

In [9]:
# Create List with female - male pairs from female-male specific words
female_male_pairs = []
for i, female in enumerate(female_words):
    female_male_pairs.append([female, male_words[i]])

In [10]:
# URLs to the files storing gender specific words as listed by the authors
gender_specific_url = "https://raw.githubusercontent.com/uvavision/Double-Hard-Debias/master/data/gender_specific_full.json"

# Empty list to accumulate gender specific words plus additional list after lowercasing
gender_specific_original = []
gender_specific = []


# Read out URL and add further gender specific words
with urllib.request.urlopen(gender_specific_url) as f:
    gender_specific_original.extend(json.load(f))

# Add lower case words to second list
for word in gender_specific_original:
    gender_specific.append(word.lower())

In [11]:
# URL to the file storing definitional pairs as listed by the authors
definitial_pairs_url = "https://raw.githubusercontent.com/uvavision/Double-Hard-Debias/master/data/definitional_pairs.json"

# Empty list to store definitional pairs plus additional list after lowercasing
definitional_pairs_original = []
definitional_pairs = []


# Read out url and add pairs in list
with urllib.request.urlopen(definitial_pairs_url) as f:
    definitional_pairs_original.extend(json.load(f))
    
# Add lower case pairs to second list
for [w1, w2] in definitional_pairs_original:
    definitional_pairs.append([w1.lower(), w2.lower()])


# Create list of single words instead of pairs  
definitional_words = []
for pair in definitional_pairs:
    for word in pair:
        definitional_words.append(word)
        

In [12]:
# URL to the file storing the equalize pairs as listed by the authors
equalize_pairs_url = "https://raw.githubusercontent.com/uvavision/Double-Hard-Debias/master/data/equalize_pairs.json"

# Empty list to store equalize pairs plus additional list after lowercasing
equalize_pairs_original = []
equalize_pairs = []

# Read out URL and add pairs to list
with urllib.request.urlopen(equalize_pairs_url) as f:
    equalize_pairs_original.extend(json.load(f))
    
# Add lower case pairs to second list
for [w1, w2] in equalize_pairs_original:
    equalize_pairs.append([w1.lower(), w2.lower()])
    
# Create list of single words instead of pairs
equalize_words = []
for pair in equalize_pairs:
    for word in pair:
        equalize_words.append(word)

In [13]:
# List of all gender specific words included in 
# female words, male words, gender specific words, equalize words and definitional words
exclude_words = list(set(female_words + male_words + gender_specific + definitional_words + equalize_words))

In [14]:
# Remove gender specific words from the embedding to obtain vocabulary of neutral words
vocab_neutral = exclude_vocab(vocab, exclude_words)
print("Vocab size: ", len(vocab))
print("Neutral vocab size: ", len(vocab_neutral))

Vocab size:  314952
Neutral vocab size:  314293


In [160]:
def embed(word, w2id=w2id, embedding=embedding):
    return embedding[w2id[word]]

### Gender Subspace

In [257]:
def idtfy_gender_subspace(word_sets, w2id, defining_sets, embedding, k=1):
    """
    identifies the bias (gender) subspace following Bolukbasi et al. 2016
    
    takes
    word_sets: vocabulary
    w2id: a dictionary to translate words contained in the vocabulary into their corresponding IDs
    defining_sets: N defining sets (pairs if I=2) consisting of I words that differ mainly on the bias (gender) direction
    embedding: the embedding of the vocabulary
    k: an integer parameters that defines how many rows of SVD(C) constitute the bias (gender) subspace B, bias (gender) direction if k=1
    
    returns
    bias_subspace: linear bias (gender) subspace (direction) that is assumed to capture most of the gender bias (denoted as B in Bolukbasi et al. 2016)
    """
    
    N = len(defining_sets) # we have 10 pairs in our defining_sets
    I = len(defining_sets[0]) # = 2 in case of pairs (as for gender bias)
    embedding_size = len(embedding[0]) # our embedding size is 300
    
    C = np.zeros((N, I, embedding_size))
        
    for n, d_set in enumerate(defining_sets):
        mean_vector = np.zeros((I, embedding_size))
        for i, word in enumerate(d_set):
            mean_vector[i] = embed(word) / I
            C[n][i] = (embed(word) - mean_vector[i]) * np.transpose((embed(word) - mean_vector[i])) / I
            
    #C = []
    #for female_word, male_word in defining_sets:
    #    mean = (embed(female_word) + embed(male_word)) / 2
    #    C.append(embed(female_word) - mean)
    #    C.append(embed(male_word) - mean)
    
    #C = np.array(C)
    
    print("C",C.shape)
    #print(C)
    #_, SVD_C, _ = np.linalg.svd(C)
    SVD_C = np.linalg.svd(C, compute_uv = False)
    print("no uv", SVD_C.shape)
    B = SVD_C[:k]
    print("B",B)
    
    
    print("cov")
    # as proven by Vargas & Cotterell 2020 the matrix C from Bolukbasi et al. 2016 can be written as an empirical covariance matrix for I = 2
    matrix = []
    for a, b in defining_sets:
        center = (embed(a) + embed(b))/2
        matrix.append(embed(a)-center)
        matrix.append(embed(b)-center)
    matrix = np.array(matrix)
    print("matrix", matrix.shape)
    #print(matrix)
    
    # have to apply PCA when interpreting C as covariance matrix
    pca = PCA(n_components = 10)
    pca.fit(matrix)
    print("pca",pca.components_[0].shape)
    # take the first pca
    pca = pca.components_[0]
    
    SVD_matrix = np.linalg.svd(C, compute_uv=False)
    svd = SVD_matrix[:k]
    print("svd",svd)

    
    print("new_B")
    array = np.ndarray((10,2,300))
    #i=0
    array_two = np.zeros((10,300,300))
    for j, d_pair in enumerate(definitional_pairs):
        for i, word in enumerate(d_pair):
            # fill array with embeddings
            array[j][i] = embedding[w2id[word]]
            #i = i+1
        # print(array[j][0].shape)
        # calculate covariance between embeddings of same definitional pair?
        array_two[j]=np.cov(np.transpose(array[j]))
        
    print(array_two.shape)
    #new_C = np.cov(array)
    #print(np.shape(new_C))
    _, new_SVD_C, _ = np.linalg.svd(array_two)
    print(np.shape(new_SVD_C))
    new_B = new_SVD_C[:k]
    print(new_B.shape)
    
    return B, new_B, pca

In [259]:
B, new_B, pca = idtfy_gender_subspace(vocab, w2id, definitional_pairs, embedding)

C (10, 2, 300)
no uv (10, 2)
B [[1.41470925 0.24232465]]
cov
matrix (20, 300)
pca (300,)
svd [[1.41470925 0.24232465]]
new_B
(10, 300, 300)
(10, 300)
(1, 300)


### most biased 500 words

In [269]:
# most biased male and female words
def most_biased(embedding, B, k=500):
    # small x, else memory issues
    x = 50000
    all_biased = np.ndarray((x,1))
    for i, word in enumerate(embedding):
        if i < x:
            all_biased[i] = (sk_m.pairwise.cosine_similarity(word.reshape(1, 300), B.reshape(1, 300)))[0]
            # print(sk_m.pairwise.cosine_similarity(word.reshape(1,300), B)[0])
    #print(all_biased)
    most_biased_f = []
    most_biased_m = []
    for word in range(k):
        # female words
        fb_index = np.argmax(all_biased)
        most_biased_f.append(fb_index)
        all_biased[fb_index] = 0
        # male words
        mb_index = np.argmin(all_biased)
        most_biased_m.append(mb_index)
        all_biased[mb_index] = 0
    #print(most_biased_f, most_biased_m)
    return most_biased_f, most_biased_m

In [270]:
index_f, index_m = most_biased(embedding, pca)

In [266]:
female_most_biased = [vocab[i] for i in index_f]
print("female", female_most_biased)
male_most_biased = [vocab[i] for i in index_m]
print("male", male_most_biased)

female ['actress', 'pregnant', 'louise', 'therese', 'abbess', 'sister', 'chairwoman', 'alumna', 'princess', 'ballerina', 'maid', 'headmistress', 'pregnancy', 'josephine', 'olga', 'spinster', 'businesswoman', 'socialite', 'woman', 'heroine', 'congresswoman', 'matron', 'emmeline', 'seductive', 'uterus', 'feminist', 'actresses', 'feisty', 'princesses', 'mary', 'herself', 'suffragette', 'suffragist', 'louisa', 'ellen', 'countess', 'waitress', 'emily', 'goddess', 'girl', 'aunt', 'menstruation', 'sisters', 'governess', 'agnes', 'laura', 'duchess', 'filipina', 'archduchess', 'kuznetsova', 'daughters', 'svetlana', 'menstrual', 'noblewoman', 'katherine', 'mother', 'grandmother', 'elsa', 'nun', 'handbag', 'rebecca', 'prostitute', 'marchioness', 'sophie', 'valentina', 'irene', 'girlfriend', 'niece', 'glamorous', 'devi', 'francisca', 'manuela', 'millicent', 'nina', 'housewife', 'henriette', 'sultry', 'hilda', 'doreen', 'virgen', 'doña', 'inna', 'helene', 'thérèse', 'edith', 'baroness', 'schoolgirl

### Double Hard Debias

In [281]:
def double_hard_debias(words, index_m, index_f, w2id):
    """Double Hard Debias:
    
    words: word embeddings of some corpus
    males: set of most biased male words 
    females: set of most biased female words
    w2id:
    """
    
    males = [words[i] for i in index_m]
    females = [words[i] for i in index_f]    
    
    #need: Word embeddings, top 500 Male biased words set Wm, top 500 Female biased words set Wf
    #1. for all word embeddings: decentralize all words
    mue = (len(words)**(-1)) * np.sum(words, axis=0)
    # print(mue)
    words_decen = np.zeros((words.shape))
    for index, embedding in enumerate(words):
        # print(index,":",embedding)
        words_decen[index] = embedding - mue
    
    #print("decentralized:",words_decen)
    #print("origin:",words)
        
    #2. for all decentralized embeddings: compute PCA
    #princ_comp = np.asarray(pca_tft(words_decen))
    #print("Principal Components:",princ_comp)
    pca = PCA().fit(words_decen)
    princ_comp = pca.components_

    #print("Sklearn PC:", pca.components_)

    evaluations = []

    #3. for all principal components:
    for pc in princ_comp:
        male_proj = np.zeros((len(males),300))
        male_debias = np.zeros((len(males),300))
        female_proj = np.zeros((len(females),300))
        female_debias = np.zeros((len(females),300))
   
        for index, male in enumerate(males):
        #male embedding = decentralized embedding - projected original (?) embedding into direction of PC
            #print((male-mue).shape, ((np.transpose(pc)*male)*pc).shape)
            male_proj[index] = (male - mue) - ((np.transpose(pc)*male)*pc)
            #with all new male embeddings: HardDebias
            male_debias[index] = hard_debias(male_proj[index])
        
        for index, female in enumerate(females):
        #female embedding = decentralized embedding - projected original (?) embedding into direction of PC
            female_proj[index] = (female - mue) - ((np.transpose(pc)*male)*pc)
            #with all new female embeddings: HardDebias
            female_debias[index] = hard_debias(female_proj[index])
    
        #for all HardDebiased embeddings: KMeansClustering (2)
        #for clustered embeddings: compute gender alignment accuracy
        #4. store evaluations for each principal components
        evaluations.append(align_acc(male_debias, female_debias))
    
    #5. evaluate which PC lead to most random cluster (evaluation smallest (close to 0.5), used second PC)
    best_eval = evaluations.index(np.min(evaluations))
    best_pc = princ_comp[best_eval]
    #print("Best PC:",best_pc,"with evaluation:",evaluations[best_eval])

    first_debias = np.zeros((words.shape))
    #6. for all decentralized embeddings: remove that PC-direction
    for index,word in enumerate(words_decen):
        first_debias[index] = word - ((np.transpose(best_pc)*words[index])*best_pc)
    
    #7. for all new embeddings: HardDebias
    double_debias = np.zeros((words.shape))
    for index,word in enumerate(first_debias):
        double_debias[index] = hard_debias(word)

    return double_debias


In [None]:
result = double_hard_debias(embedding, index_m, index_f, w2id)

In [282]:
#Gender alignment accuracy/ Neighborhood Metric:
def align_acc(males, females):
    """bias measurement using KMeans Clustering
    
    takes female and male word's embeddings
    ground truth labels:
    0 = male,
    1 = female"""
    
    #need: k (=1000) most biased female and male word's embedding (cosine similarity embedding & gender direction),
    #1. assign ground truth gender labels: 0 = male, 1 = female
    #2. run KMeans on embeddings
    kmeans = KMeans(n_clusters=2).fit(np.concatenate((males, females)))
    split = males.shape[0]
    correct = 0
    #print(kmeans.labels_)
    #3. compute alignment score: cluster assignment vs ground truth gender label
    for i in range(np.concatenate((males, females)).shape[0]):
        if i < split and kmeans.labels_[i] == 0:
            correct+= 1
        elif i >= split and kmeans.labels_[i] == 1:
            correct += 1
    
    #4. alignment score = max(a, 1-a)
    alignment = 1/(2*2) * correct
    alignment = np.maximum(alignment, 1-alignment)
    
    return alignment 

Additional inputs: words to neutralize $N\subseteq W$, family of equality sets $\mathcal{E} = \{E_1, E_2, ..., E_m\}$ where each $E_i \subseteq W$. For each word $w \in N$, let $\vec{w}$ be re-embedded to $\vec{w}:=(\vec{w}-\vec{w}_B/||\vec{w}-\vec{w}_B||$. For each set $E\in \mathcal{E}$, let $\mu:=\sum_{w\in E}w/|E|$ and $v:=\mu-\mu_B$. For each $w \in E$, $\vec{w}:=v+\sqrt{1-||v||^2}\frac{\vec{w}_B-\mu_B}{||\vec{w}_B-\mu_B||}$. Finally, output the subspace $B$ and the new embedding $\{\vec{w}\in\mathbb{R}^d\}_{w\in W}$.

In [283]:
def hard_debias (word_emb, equalize_pairs=equalize_pairs, B=pca):
    """performs hard debias on a word embedding to neutralize it,
    
    takes 
    word_emb: word embedding of the word to be neutralized,
    equalize_pairs: equality pairs, each neutral word should be equidistant to all words in each equality set
    B: the bias subspace
    
    returns
    B: the bias subspace
    new_word_emb: the new embedding for word_emb
    """
    
    new_word_emb = word_emb - B * (word_emb.dot(B) / B.dot(word_emb))
    
    return new_word_emb#, B im paper steht, dass auch B returned werden soll, aber das macht hier keinen Sinn

In [284]:
result = double_hard_debias(embedding, index_m, index_f, w2id)

In [285]:
print(result)

[[ 0.46401551 -0.36138454 -0.14305111 ... -0.02062871  0.49958673
  -0.1748378 ]
 [ 0.31128478  0.10707839  0.11905837 ...  0.04600415  0.43654551
  -0.49432505]
 [ 0.1432653   0.00264024  0.2555798  ...  0.0127982   0.17556582
  -0.20136911]
 ...
 [-0.15505765 -0.06034876 -0.40329671 ...  0.01550556 -0.02788516
  -0.22335757]
 [ 0.20760419  0.2392062  -0.29398582 ...  0.28803001  0.00363695
   0.17358627]
 [-0.64096775  0.31325449 -0.33158808 ...  0.28636279 -0.10186761
  -0.01754304]]


In [286]:
print(result.shape)

(314952, 300)
