# Debiasing Word Embeddings: An Exploration

In [None]:
from pathlib import Path
from itertools import chain, islice

import numpy as np
from sklearn.decomposition import PCA

from experiments import WordEmbedding
from experiments import read_word_list, read_gender_pairs
from experiments import recenter, normalize, project, reject
from experiments import define_pca_gender_direction
from experiments import load_fasttext_embedding, load_word2vec_embedding
from experiments import debias_bolukbasi_original
from experiments import create_randomized_swapped_corpus

Define the corpus and data to use

In [None]:
corpus_path = Path('corpora/wikipedia-1')

gender_pairs_file = Path('data/gender-pairs/definitional')
gender_pairs = read_gender_pairs(gender_pairs_file)
gendered_words = read_word_list(Path('data/gendered-words/gender_specific_seed'))
equalize_pairs = read_gender_pairs(Path('data/gender-pairs/equalize'))

Load the different embeddings.

In [None]:
baseline_model = load_word2vec_embedding(
    corpus_path,
    out_path=Path('models/paper-validation-baseline.w2v')
)
bolukbasi_model = debias_bolukbasi_original(
    baseline_model,
    gender_pairs,
    gendered_words,
    equalize_pairs,
    out_path=Path('models/paper-validation-bolukbasi.w2v'),
)
swapped_corpus = create_randomized_swapped_corpus(
    corpus_path,
    [*gender_pairs, *equalize_pairs],
)
swapped_model = load_fasttext_embedding(
    swapped_corpus,
    method='cbow',
    out_path=Path('models/paper-validation-swapped.w2v'),
)

Load the different word lists.

In [None]:
races = read_word_list(Path('data/swap-groups/races'))
nationalities = read_word_list(Path('data/swap-groups/nationalities'))
occupations = read_word_list(Path('data/biased-words/occupations'))
adjectives = read_word_list(Path('data/biased-words/adjectives'))

Define some useful functions.

In [None]:
def cumulative_sum(seq):
    """Calculate the cumulative sum of a sequence of numbers.
    
    Parameters:
        seq (Sequence[float]): The sequence of numbers.
        
    Yields:
        float: The cumulative sum.
    """
    total = 0
    for x in seq:
        total += x
        yield total

def find_elbows(seq):
    """Find the "elbows" for PCA/clustering.
    
    Parameters:
        seq (Sequence[float]): The sequence of variance explained.
        
    Returns:
        List[Tuple[float, int]]: List of (score, index) tuples, in decreasing order.
    """
    cum_sum = list(cumulative_sum(seq))
    scores = []
    for i, (a, b, c) in enumerate(zip(cum_sum[:-2], cum_sum[1:-1], cum_sum[2:])):
        proportion = (b - a) / (c - a)
        absolute = b - (a + (c - a) / 2)
        scores.append((proportion, i))
    return sorted(scores, reverse=True)

def measure_embedding_similarity(embeddings, num_words=100):
    """Measure the distance between word embeddings.

    Parameters:
        embeddings (List[WordEmbedding]): The embeddings to compare.
        num_words (int): The number of words to measure distance on, creating
            num_words**2 pairs. Defaults to 100.

    Returns:
        List[List[float]]: A symmetric matrix of similarities.
    """
    vocabulary = None
    num_embeddings = len(embeddings)
    word_distances = [{} for _ in range(num_embeddings)]
    for embedding_id, embedding in enumerate(embeddings):
        if vocabulary is None:
            rng = Random(8675309)
            vocabulary = rng.sample(list(embedding.words), num_words)
        for i, word1 in enumerate(vocabulary[:-1]):
            for word2 in vocabulary[i+1:]:
                word_distances[embedding_id][(word1, word2)] = embedding.distance(word1, word2)
    distances = []
    for embedding1_id, embedding2_id in zip(range(num_embeddings - 1), range(1, num_embeddings)):
        pass
        '''FIXME
        what are some methods for comparing embeddings?
            count the overlap in the top-ten most-similar words (higher is better)
                https://towardsdatascience.com/comparing-word-embeddings-c2efd2455fe3
            measure average distance between word pairs (lower is better)
            https://www.sciencedirect.com/science/article/pii/S1045926X18301241
            https://www.aclweb.org/anthology/D15-1036
        '''
    return distances

## What words are near the "main" races?

In [None]:
for race in sorted(races):
    print(race)
    for nearby, distance in baseline_model.words_near_word(race):
        print(f'    {nearby} ({distance:.4f})')

## What is the dimension of the race subspace?

In [None]:
matrix = [list(baseline_model[race]) for race in races]
centered = recenter(np.array(matrix))
pca = PCA(n_components=len(races))
pca.fit(centered)
for component, variance_percent in zip(pca.components_, pca.explained_variance_ratio_):
    print(f'{variance_percent:.3%}')
race_subspace = normalize(pca.components_)

There are fewer dimensions here than the number of "races", which is to be expected. I'm surprised the subspace is this large though - I would have thought it would just be 2-3 dimensions.

## What is the dimension of the nationalities subspace?

In [None]:
print(f'{len(nationalities)} nationalities: {", ".join(sorted(nationalities))}')

In [None]:
matrix = [
    list(baseline_model[nationality.lower()])
    for nationality in nationalities
    if nationality.lower() in baseline_model
]
centered = recenter(np.array(matrix))
pca = PCA(n_components=min(len(matrix), len(matrix[0])))
pca.fit(centered)
for component, variance_percent in islice(zip(pca.components_, pca.explained_variance_ratio_), 20):
    print(f'{variance_percent:.3%}')

Using nationalities showcases the problems: there are more nationalities than there are dimensions, and although the variance explains drops off, there is no clear cutoff point.

In [None]:
matrix = [
    list(baseline_model[nationality.lower()])
    for nationality in nationalities
    if nationality.lower() in baseline_model
]
centered = recenter(np.array(matrix))
pca = PCA(n_components=min(len(matrix), len(matrix[0])))
pca.fit(centered)
cum_var = list(cumulative_sum(pca.explained_variance_ratio_))
print(cum_var)



def find_thresholds(thresholds, sequence):
    threshold_index = 0
    var_index = 0
    while threshold_index < len(thresholds) and var_index < len(cum_var):
        variance = sequence[var_index]
        if variance > thresholds[threshold_index]:
            yield var_index
            threshold_index += 1
        var_index += 1
            

thresholds = [n / 10 for n in range(2, 10, 2)]
print(list(find_thresholds(thresholds, cum_var)))

## What is the dimension of the gender subspace?

This seemingly obvious question is about whether you can just through pairs of gendered words into PCA and recover the single dimension.

In [None]:
gender_words = read_word_list(Path('data/gender-pairs/definitional'))
matrix = [list(baseline_model[word]) for word in gender_words if word in baseline_model]
centered = recenter(np.array(matrix))
pca = PCA(n_components=len(matrix))
pca.fit(centered)
for component, variance_percent in zip(pca.components_, pca.explained_variance_ratio_):
    print(f'{variance_percent:.3%}')
gender_subspace = normalize(pca.components_)

This seems to suggest that you *can't* just throw gender into PCA in this way, which I think means this "gender pair" method is somewhat fragile. How will you know if you can throw other linear subspaces (eg. age) into PCA?

Follow-up question: since we do have a "ground truth" using the per-pair PCA method, how do these components compare to that vector?

In [None]:
gender_pairs = read_gender_pairs(Path('data/gender-pairs/definitional')) 
bolukbasi_gender_direction = define_pca_gender_direction(baseline_model, gender_pairs)


gender_words = read_word_list(Path('data/gender-pairs/definitional'))
matrix = [list(baseline_model[word]) for word in gender_words if word in baseline_model]
centered = recenter(np.array(matrix))
pca = PCA(n_components=len(matrix))
pca.fit(centered)
gender_subspace = normalize(pca.components_)
for i, (component, variance_percent) in enumerate(zip(gender_subspace, pca.explained_variance_ratio_), start=1):
    print(' '.join([
        f'Component {i}:',
        f'{variance_percent:.3%} variance explained,',
        f'groundtruth projection {np.linalg.norm(project(bolukbasi_gender_direction, component)):.3f}',
    ]))

I would not have expected this result - since all vectors are normalized, a projection of ~0.5 would mean that the angle is ~acos(0.5/1) = ~60 degrees. The interesting thing is that the this doesn't correlate with the amount of variance explained: components 1, 2, 5, 6 have around 60 degree angle, while components 3, 4 are practically orthogonal.

I do think this is a meaningful result for debiasing in general - that figuring out the subspace is complicated in the first place. Even with just binary gender there are multiple methods (centroid of pairs, PCA of pairs, PCA of words), and it becomes more complicated with non-linear subspaces. This bolsters the argument for a non-subspace-based debiasing method - assuming we can find differences in results.

## Are there words that lose meaning if we remove their racial components?

We answer this by looking for words whose component in the racial subspace is larger than the other component.

In [None]:
projected_component = project(baseline_model.vectors, race_subspace)
print(projected_component.shape)
projected_norms = np.linalg.norm(projected_component, axis=1)
print(projected_norms.shape)
rejected_component = baseline_model.vectors - projected_component
print(rejected_component.shape)
rejected_norms = np.linalg.norm(rejected_component, axis=1)
print(rejected_norms.shape)
indicator = (projected_norms > rejected_norms)
print(indicator.shape)
for word, in_subspace in zip(baseline_model.words, indicator):
    if in_subspace:
        print(word)

The answer is no. I suspect the curse of dimensionality is in play here: with 100 total dimensions and only 2-3 racial dimensions, it will never be the majority component except for the words we are doing the PCA on.

## Are the words near adjectives and occupations different between the different embeddings?

In [None]:
limit = 10
rows = []
for index, word in enumerate(chain(adjectives, occupations)):
    for model_name, model in [('bolukbasi', bolukbasi_model), ('swapped', swapped_model)]:
        for nearby, distance in model.words_near_word(word):
            rows.append([word, model_name, nearby, distance])
    if index > limit:
        break
for row in rows:
    print(row)