1. Visualize word embeddings (NLP specialization; t-SNE, Week 2 of Sequence Models)
2. De-bias word embeddings (Week 2 of Sequence Models)
3. Neural Machine Translation (Week 3 of Sequence Models) - problem: supervised!

Bleu Score for validating hypothesis?

TODO:
- second data set and / or bias axis
- BERT
- Repo-Link

# Operations on Word Vectors

In [31]:
import numpy as np

In [32]:
# Download and extract pre-trained GloVe embeddings (822 MB): https://nlp.stanford.edu/data/glove.6B.zip
# Save the 50-dimensional embeddings to the "data" folder (glove.6B.50d.txt)

def read_glove_vecs(glove_file):
    with open(glove_file, 'r') as f:
        words = set()
        word_to_vec_map = {}
        
        for line in f:
            line = line.strip().split()
            curr_word = line[0]
            words.add(curr_word)
            word_to_vec_map[curr_word] = np.array(line[1:], dtype=np.float64)
            
    return words, word_to_vec_map

# Load the 50-dimensional GloVe word vectors
words, word_to_vec_map = read_glove_vecs("data/glove.6B.50d.txt")

---
$$\text{CosineSimilarity(u, v)} = \frac {u \cdot v} {||u||_2 ||v||_2} = cos(\theta) \tag{1}$$
---

In [61]:
from scipy.spatial.distance import cosine

In [5]:
def cosine_similarity(u, v):
    """
    Returns the cosine similarity between two vectors u and v.
        
    Arguments:
        u -- a word vector of shape (n,)          
        v -- a word vector of shape (n,)

    Returns:
        cosine_similarity -- the cosine similarity between u and v defined by the formula above.
    """
    
    # Special case. Consider the case u = [0, 0], v=[0, 0]
    if np.all(u == v):
        return 1
    
    # Compute the dot product between u and v 
    dot = np.dot(u, v)
    
    # Compute the L2 norm of u 
    norm_u = np.sqrt(np.sum(u**2))
    
    # Compute the L2 norm of v 
    norm_v = np.sqrt(np.sum(v**2))
    
    # Avoid division by 0
    if np.isclose(norm_u * norm_v, 0, atol=1e-32):
        return 0
    
    # Compute the cosine similarity defined by formula (1) 
    cosine_similarity = dot / (norm_u * norm_v)
    
    return cosine_similarity

In [6]:
# Load word vectors
man = word_to_vec_map["man"]
doctor = word_to_vec_map["doctor"]
woman = word_to_vec_map["woman"]
nurse = word_to_vec_map["nurse"]

# Demonstrate gender bias
man_to_doctor_as_woman_to_nurse = cosine_similarity(man, doctor) - cosine_similarity(woman, nurse)
man_to_nurse_as_woman_to_doctor = cosine_similarity(man, nurse) - cosine_similarity(woman, doctor)

# Print results
print(round(man_to_doctor_as_woman_to_nurse, 2))
print(round(man_to_nurse_as_woman_to_doctor, 2))
print(f"man-doctor:\t{round(cosine_similarity(man, doctor), 2)}")
print(f"woman-nurse:\t{round(cosine_similarity(woman, nurse), 2)}")
print(f"man-woman:\t{round(cosine_similarity(man, woman), 2)}")
print(f"doctor-nurse:\t{round(cosine_similarity(doctor, nurse), 2)}")
print(f"man-nurse:\t{round(cosine_similarity(man, nurse), 2)}")
print(f"woman-doctor:\t{round(cosine_similarity(woman, doctor), 2)}")

-0.0
-0.15
man-doctor:	0.71
woman-nurse:	0.72
man-woman:	0.89
doctor-nurse:	0.8
man-nurse:	0.57
woman-doctor:	0.73


In [7]:
def complete_analogy(word_a, word_b, word_c, word_to_vec_map):
    """
    Performs the word analogy task: a is to b as c is to ____. 
    
    Arguments:
    word_a -- a word, string
    word_b -- a word, string
    word_c -- a word, string
    word_to_vec_map -- dictionary that maps words to their corresponding vectors. 
    
    Returns:
    best_word --  the word such that v_b - v_a is close to v_best_word - v_c, as measured by cosine similarity
    """
    
    # Convert words to lowercase
    word_a, word_b, word_c = word_a.lower(), word_b.lower(), word_c.lower()
    
    # Get the word embeddings e_a, e_b and e_c
    e_a, e_b, e_c = word_to_vec_map[word_a], word_to_vec_map[word_b], word_to_vec_map[word_c]
    
    words = word_to_vec_map.keys()
    max_cosine_sim = -100              # Initialize max_cosine_sim to a large negative number
    best_word = None                   # Initialize best_word with None, it will help keep track of the word to output
    
    # Loop over the whole word vector set
    for w in words:   
        # To avoid best_word being one of the input words, skip the input word_c
        # Skip word_c from query
        if w == word_c:
            continue
        
        # Compute cosine similarity between the vector (e_b - e_a) and the vector ((w's vector representation) - e_c)  
        cosine_sim = cosine_similarity(e_b - e_a, word_to_vec_map[w] - e_c)
        
        # If the cosine_sim is more than the max_cosine_sim seen so far,
            # Then: set the new max_cosine_sim to the current cosine_sim and the best_word to the current word
        if cosine_sim > max_cosine_sim:
            max_cosine_sim = cosine_sim
            best_word = w
        
    return best_word

In [8]:
# Define triads to try analogies
triads = [('italy', 'italian', 'spain'), ('india', 'delhi', 'japan'), ('man', 'woman', 'boy'), ('small', 'smaller', 'large')]

# Print results
for triad in triads:
    print ('{} -> {} :: {} -> {}'.format( *triad, complete_analogy(*triad, word_to_vec_map)))

italy -> italian :: spain -> spanish
india -> delhi :: japan -> tokyo
man -> woman :: boy -> girl
small -> smaller :: large -> smaller


# Debiasing Word Vectors

The debiasing algorithm is from Bolukbasi et al., 2016, [Man is to Computer Programmer as Woman is to
Homemaker? Debiasing Word Embeddings]

#### 1. Identify bias direction (e.g. gender)
- e(he) - e(she)
- e(male) - e(female)
- ...
- Average = bias direction of gender

#### 2. Neutralize gender-neutral words
- gender-intrinsic (e.g. girl/boy, he/she) vs. gender-neutral (e.g. doctor, babysitter)
- linear classifier to identify which words should be neutralized

#### 3. Equalize pairs
- e.g. grandmother and grandfather should have the same distance from gender-neutral words
- hand-pick pairs to be equalized

In [15]:
# Calculate distance between gender pair woman-man
woman_man = word_to_vec_map['woman'] - word_to_vec_map['man']

# Calculate distance between gender pair mother-father
mother_father = word_to_vec_map['mother'] - word_to_vec_map['father']

# Calculate distance between gender pair girl-boy
girl_boy = word_to_vec_map['girl'] - word_to_vec_map['boy']

# Average over the gender pairs to get a simple representation of gender
gender = np.average([woman_man, mother_father, girl_boy], axis=0)

# Print gender vector
print(gender)

[ 0.07656667  0.34967667 -0.40057667 -0.03130333  0.0088      0.72586333
  0.10256     0.14906333  0.4780662  -0.22850987  0.05957667 -0.68663
  0.62210033  0.10395     0.17747667  0.09556867 -0.49258333 -0.17066233
  0.46930033  0.02196333  0.28145667  0.50513333  0.17144733  0.40154767
  0.24039333  0.1646     -0.17984667  0.24042667  0.05689333 -0.31423
 -0.10933333  0.26355967  0.06100667 -0.01156405 -0.12236333 -0.188245
 -0.13215057 -0.068186    0.05624667 -0.29555567 -0.09669533 -0.29559667
  0.62465867 -0.40130167  0.03330667 -0.24831667  0.26381667 -0.28738333
  0.03020433  0.054106  ]


In [18]:
# Define girls and boys names for comparing the gender similarity
name_list = ['john', 'marie', 'sophie', 'ronaldo', 'priya', 'rahul', 'danielle', 'reza', 'katy', 'yasmin']

# Print results
for w in name_list:
    print (w, round(cosine_similarity(word_to_vec_map[w], gender), 2))

john -0.31
marie 0.34
sophie 0.41
ronaldo -0.29
priya 0.2
rahul -0.19
danielle 0.29
reza -0.17
katy 0.31
yasmin 0.2


In [23]:
# Define random words for comparing the gender similarity
word_list = ['lipstick', 'guns', 'science', 'arts', 'literature', 'warrior','doctor', 'tree', 'receptionist', 
             'technology',  'fashion', 'teacher', 'engineer', 'pilot', 'computer', 'singer']

# Print results (and look at the gender stereotypes!)
for w in word_list:
    print (w, round(cosine_similarity(word_to_vec_map[w], gender), 2))

lipstick 0.41
guns -0.09
science -0.06
arts 0.01
literature 0.02
warrior -0.17
doctor 0.08
tree 0.04
receptionist 0.3
technology -0.16
fashion 0.14
teacher 0.11
engineer -0.23
pilot -0.04
computer -0.17
singer 0.2


---
Bias Neutralization Algorithm (https://arxiv.org/abs/1607.06520)

$$e^{bias\_component} = \frac{e \cdot g}{||g||_2^2} * g\tag{2}$$
$$e^{debiased} = e - e^{bias\_component}\tag{3}$$
---

In [25]:
def neutralize(word, g, word_to_vec_map):
    """
    Removes the bias of "word" by projecting it on the space orthogonal to the bias axis. 
    This function ensures that gender neutral words are zero in the gender subspace.
    
    Arguments:
        word -- string indicating the word to debias
        g -- numpy-array of shape (50,), corresponding to the bias axis (such as gender)
        word_to_vec_map -- dictionary mapping words to their corresponding vectors.
    
    Returns:
        e_debiased -- neutralized word vector representation of the input "word"
    """
    
    # Select word vector representation of "word". Use word_to_vec_map. 
    e = word_to_vec_map[word]
    
    # Compute e_biascomponent using the formula given above. 
    e_biascomponent = np.dot(np.dot(e, g) / np.sum(g ** 2), g)
 
    # Neutralize e by subtracting e_biascomponent from it.
    # e_debiased should be equal to its orthogonal projection.
    e_debiased = e - e_biascomponent
    
    return e_debiased

In [31]:
# Define a word and generate a gender-debiased version of it
e = "receptionist"
e_debiased = neutralize(e, gender, word_to_vec_map)

# Print results
print("cosine similarity between " + e + " and g, before neutralizing: ", round(cosine_similarity(word_to_vec_map["receptionist"], gender), 2))
print("cosine similarity between " + e + " and g, after neutralizing: ", round(cosine_similarity(e_debiased, gender), 2))

cosine similarity between receptionist and g, before neutralizing:  0.3
cosine similarity between receptionist and g, after neutralizing:  0.0


---
Equalization Algorithm for Gender-specific Words (https://arxiv.org/abs/1607.06520)

$$ \mu = \frac{e_{w1} + e_{w2}}{2} \tag{4} $$ 

$$ \mu_{B} = \frac {\mu \cdot \text{bias axis}}{||\text{bias axis}||_2^2} *\text{bias axis} \tag{5} $$

$$ \mu_{\perp} = \mu - \mu_{B} \tag{6} $$

$$ e_{w1B} = \frac {e_{w1} \cdot \text{bias axis}}{||\text{bias axis}||_2^2} *\text{bias axis} \tag{7} $$

$$ e_{w2B} = \frac {e_{w2} \cdot \text{bias axis}}{||\text{bias axis}||_2^2} *\text{bias axis} \tag{8} $$

$$e_{w1B}^{corrected} = \sqrt{ |{1 - ||\mu_{\perp} ||^2_2} |} * \frac{e_{\text{w1B}} - \mu_B} {||(e_{w1} - \mu_{\perp}) - \mu_B||_2} \tag{9} $$

$$e_{w2B}^{corrected} = \sqrt{ |{1 - ||\mu_{\perp} ||^2_2} |} * \frac{e_{\text{w2B}} - \mu_B} {||(e_{w2} - \mu_{\perp}) - \mu_B||_2} \tag{10} $$

$$e_1 = e_{w1B}^{corrected} + \mu_{\perp} \tag{11} $$

$$e_2 = e_{w2B}^{corrected} + \mu_{\perp} \tag{12} $$
---

In [32]:
def equalize(pair, bias_axis, word_to_vec_map):
    """
    Debiases gender-specific words by following the equalize method described above.
    
    Arguments:
    pair -- pair of strings of gender specific words to debias, e.g. ("actress", "actor") 
    bias_axis -- numpy-array of shape (50,), vector corresponding to the bias axis, e.g. gender
    word_to_vec_map -- dictionary mapping words to their corresponding vectors
    
    Returns
    e_1 -- word vector corresponding to the first word
    e_2 -- word vector corresponding to the second word
    """
    
    # Step 1: Select word vector representation of "word". Use word_to_vec_map
    w1, w2 = pair
    e_w1, e_w2 = word_to_vec_map[w1], word_to_vec_map[w2]
    
    # Step 2: Compute the mean of e_w1 and e_w2
    mu = (e_w1 + e_w2) / 2

    # Step 3: Compute the projections of mu over the bias axis and the orthogonal axis
    mu_B = np.dot(np.dot(mu, bias_axis) / np.sum(bias_axis ** 2), bias_axis)
    mu_orth = mu - mu_B

    # Step 4: Use equations (7) and (8) to compute e_w1B and e_w2B 
    e_w1B = np.dot(np.dot(e_w1, bias_axis) / np.sum(bias_axis ** 2), bias_axis)
    e_w2B = np.dot(np.dot(e_w2, bias_axis) / np.sum(bias_axis ** 2), bias_axis)
        
    # Step 5: Adjust the bias part of e_w1B and e_w2B using the formulas (9) and (10) given above 
    corrected_e_w1B = np.dot(np.sqrt(np.abs(1 - np.sum(mu_orth ** 2))), (e_w1B - mu_B) / np.linalg.norm(e_w1 - mu_orth - mu_B))
    corrected_e_w2B = np.dot(np.sqrt(np.abs(1 - np.sum(mu_orth ** 2))), (e_w2B - mu_B) / np.linalg.norm(e_w2 - mu_orth - mu_B))

    # Step 6: Debias by equalizing e1 and e2 to the sum of their corrected projections 
    e1 = corrected_e_w1B + mu_orth
    e2 = corrected_e_w2B + mu_orth
    
    return e1, e2

In [35]:
print("cosine similarities before equalizing:")
print("cosine_similarity(word_to_vec_map[\"man\"], gender) = ", round(cosine_similarity(word_to_vec_map["man"], gender), 2))
print("cosine_similarity(word_to_vec_map[\"woman\"], gender) = ", round(cosine_similarity(word_to_vec_map["woman"], gender), 2))

print()
e1, e2 = equalize(("man", "woman"), gender, word_to_vec_map)

print("cosine similarities after equalizing:")
print("cosine_similarity(e1, gender) = ", round(cosine_similarity(e1, gender), 2))
print("cosine_similarity(e2, gender) = ", round(cosine_similarity(e2, gender), 2))

cosine similarities before equalizing:
cosine_similarity(word_to_vec_map["man"], gender) =  -0.02
cosine_similarity(word_to_vec_map["woman"], gender) =  0.4

cosine similarities after equalizing:
cosine_similarity(e1, gender) =  -0.66
cosine_similarity(e2, gender) =  0.66
