Axis Orthogonality
==================

Analyzes orthogonality of axes to see if projections onto axes will be mostly invariant of the other basis axis in a 2D projection

In [1]:
import gensim
import numpy as np

In [2]:
model = gensim.models.KeyedVectors.load_word2vec_format('../GoogleNews-vectors-negative300.bin', binary=True)

In [3]:
AXES = {}

AXIS_DEFS = {
    'gender': [('man', 'woman'), ('men', 'women'), ('he', 'she'), ('him', 'her'), ('his', 'her'), ('his', 'hers'), ('boy', 'girl'), ('boys', 'girls'), ('male', 'female'), ('masculine', 'feminine')],
    'class': [('rich', 'poor'), ('richer', 'poorer'), ('richest', 'poorest'), ('affluence', 'poverty'), ('affluent', 'impoverished'), ('expensive', 'inexpensive'), ('luxury', 'cheap'), ('opulent', 'needy')],
    'race': [('black', 'white'), ('blacks', 'whites'), ('Blacks', 'Whites'), ('Black', 'White'), ('African', 'European'), ('African', 'Caucasian')]
}

In [4]:
for axis in AXIS_DEFS:
        word_pairs = AXIS_DEFS[axis]
        all_axis_vecs = [model.wv[wp[0]] - model.wv[wp[1]] for wp in word_pairs]
        AXES[axis] = np.average(all_axis_vecs, axis=0)

  This is separate from the ipykernel package so we can avoid doing imports until


In [9]:
normed_gender = AXES['gender'] / np.linalg.norm(AXES['gender'])
normed_class = AXES['class'] / np.linalg.norm(AXES['class'])
normed_race = AXES['race'] / np.linalg.norm(AXES['race'])

In [11]:
np.arccos(normed_gender.dot(normed_class)) / np.pi * 180

89.67793859454456

In [13]:
np.arccos(normed_gender.dot(normed_race)) / np.pi * 180

92.02944978237488

In [14]:
np.arccos(normed_class.dot(normed_race)) / np.pi * 180

97.99801058859052