In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import torch
import pandas as pd
import numpy as np
from pathlib import Path
from typing import *
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
import sys
sys.path.append("../lib")

In [4]:
from bert_utils import Config, BertPreprocessor

In [5]:
config = Config(
    model_type="bert-base-uncased",
    max_seq_len=128,
)

In [6]:
processor = BertPreprocessor(config.model_type, config.max_seq_len)

02/18/2019 19:13:43 - INFO - pytorch_pretrained_bert.tokenization -   loading vocabulary file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased-vocab.txt from cache at /Users/keitakurita/.pytorch_pretrained_bert/26bc1ad6c0ac742e9b52263248f6d0f00068293b33709fae12320c0e35ccfbbb.542ce4285a40d23a559526243235df47c5f75c197f04f37d1a0c124c32c9a084


### Prepare model

In [7]:
from pytorch_pretrained_bert import BertConfig, BertForMaskedLM
model = BertForMaskedLM.from_pretrained(config.model_type)
model.eval()

02/18/2019 19:13:44 - INFO - pytorch_pretrained_bert.modeling -   loading archive file https://s3.amazonaws.com/models.huggingface.co/bert/bert-base-uncased.tar.gz from cache at /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba
02/18/2019 19:13:44 - INFO - pytorch_pretrained_bert.modeling -   extracting archive file /Users/keitakurita/.pytorch_pretrained_bert/9c41111e2de84547a463fd39217199738d1e3deb72d4fec4399e6e241983c6f0.ae3cef932725ca7a30cdcb93fc6e09150a55e2a130ec7af63975a16c153ae2ba to temp dir /var/folders/hy/1czs1y5j2d58zgkqx6w_wnpw0000gn/T/tmpbtkaa022
02/18/2019 19:13:47 - INFO - pytorch_pretrained_bert.modeling -   Model config {
  "attention_probs_dropout_prob": 0.1,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 768,
  "initializer_range": 0.02,
  "intermediate_size": 3072,
  "max_position_embeddings": 512,
  "num_attention_heads"

BertForMaskedLM(
  (bert): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): BertLayerNorm()
      (dropout): Dropout(p=0.1)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0): BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): BertLayerNorm()
              (dropout): Dropout(p=0.1)
            )
          )
          (intermediate): BertIntermediate(
       

In [8]:
sequence_output, pooled_output = model.bert(processor.to_bert_model_input("hello world"),
                                            output_all_encoded_layers=False)

In [9]:
def get_word_vector(sentence: str, word: str, n_calc: int=10):
    idx = processor.get_index(sentence, word)
    outputs = None
    with torch.no_grad():
        for _ in range(n_calc):
            sequence_output, _ = model.bert(processor.to_bert_model_input(sentence),
                                            output_all_encoded_layers=False)
            sequence_output.squeeze_(0)
            if outputs is None: outputs = torch.zeros_like(sequence_output)
            outputs = sequence_output + outputs
    return outputs.detach().cpu().numpy()[idx] / n_calc

In [10]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

In [11]:
vec1 = get_word_vector("he is a programmer.", "programmer")

In [12]:
vec2 = get_word_vector("she is a programmer.", "programmer")

In [13]:
np.linalg.norm(vec1 - vec2)

3.1893094

In [14]:
diff1 = vec1 - vec2

In [15]:
out_softmax = model.cls.predictions.decoder.weight.data.cpu().numpy()

In [16]:
out_softmax.shape

(30522, 768)

In [17]:
ordering = ((out_softmax @ vec1)).argsort()

In [18]:
(out_softmax @ vec1).shape

(30522,)

In [19]:
processor.token_to_index("programmer")

20273

In [20]:
ordering = (-(out_softmax @ vec1)).argsort()

In [21]:
(out_softmax @ vec1)[(out_softmax @ vec1).argsort()]

array([-1.8011806, -1.7252079, -1.6536065, ...,  3.1047938,  4.32384  ,
        4.811602 ], dtype=float32)

In [22]:
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{29024: 'programmer',
 22224: '[CLS]',
 19482: '##kato',
 7975: 'programmers',
 12416: 'computational',
 24599: 'mathematicians',
 14957: 'hindwings',
 24939: 'constructions',
 8656: 'mathematical',
 16697: 'planner'}

In [23]:
ordering = (-(out_softmax @ vec2)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{20919: '[CLS]',
 12791: 'programmer',
 21291: '##kato',
 6802: 'mathematical',
 6215: 'programmers',
 10774: 'computational',
 7348: 'constructions',
 5195: 'mathematicians',
 29172: 'mathematician',
 16189: 'nguyen'}

In [24]:
ordering = (-(out_softmax @ (vec1 - vec2))).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{29632: 'lublin',
 13620: 'princess',
 25972: '##lika',
 8296: '##sell',
 20209: 'selma',
 23939: '##rgen',
 12656: '##ouk',
 8478: '##rock',
 1013: '##sant',
 18130: 'joan'}

In [25]:
vec1 = get_word_vector("he is a person", "person")

In [26]:
vec2 = get_word_vector("she is a person", "person")

In [27]:
np.linalg.norm(vec1 - vec2)

2.657354

In [28]:
diff2 = vec1 - vec2

In [29]:
np.dot(diff1, diff2) / (np.linalg.norm(diff1) * np.linalg.norm(diff2))

0.14555535

In [30]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
vec2 = get_word_vector("she is [MASK].", "[MASK]")
diff3 = vec1 - vec2

In [31]:
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{30369: '[CLS]',
 17073: '[MASK]',
 20660: ',',
 11980: '.',
 3710: '[SEP]',
 27237: 'and',
 11256: 'the',
 27213: 'of',
 1869: 'in',
 23266: 'to'}

In [32]:
ordering = (-(out_softmax @ vec2)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{16466: '[CLS]',
 2051: '[MASK]',
 22630: ',',
 22403: '.',
 14568: 'and',
 5205: '[SEP]',
 27994: 'of',
 28900: 'the',
 14947: 'in',
 15689: 'a'}

In [33]:
ordering = (-(out_softmax @ (vec1 - vec2))).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{24031: 'dime',
 14279: 'touring',
 14875: '##lho',
 23402: '##play',
 5069: '##mler',
 27537: '##pheus',
 4158: '##night',
 17765: 'lair',
 9071: 'cargo',
 18386: '##ately'}

## Softmax layer analysis

In [34]:
processor.full_vocab

{0: '[PAD]',
 1: '[unused0]',
 2: '[unused1]',
 3: '[unused2]',
 4: '[unused3]',
 5: '[unused4]',
 6: '[unused5]',
 7: '[unused6]',
 8: '[unused7]',
 9: '[unused8]',
 10: '[unused9]',
 11: '[unused10]',
 12: '[unused11]',
 13: '[unused12]',
 14: '[unused13]',
 15: '[unused14]',
 16: '[unused15]',
 17: '[unused16]',
 18: '[unused17]',
 19: '[unused18]',
 20: '[unused19]',
 21: '[unused20]',
 22: '[unused21]',
 23: '[unused22]',
 24: '[unused23]',
 25: '[unused24]',
 26: '[unused25]',
 27: '[unused26]',
 28: '[unused27]',
 29: '[unused28]',
 30: '[unused29]',
 31: '[unused30]',
 32: '[unused31]',
 33: '[unused32]',
 34: '[unused33]',
 35: '[unused34]',
 36: '[unused35]',
 37: '[unused36]',
 38: '[unused37]',
 39: '[unused38]',
 40: '[unused39]',
 41: '[unused40]',
 42: '[unused41]',
 43: '[unused42]',
 44: '[unused43]',
 45: '[unused44]',
 46: '[unused45]',
 47: '[unused46]',
 48: '[unused47]',
 49: '[unused48]',
 50: '[unused49]',
 51: '[unused50]',
 52: '[unused51]',
 53: '[unused52]',

In [35]:
word_vectors ={
    word: out_softmax[i, :]
    for i, word in processor.full_vocab.items()
}

In [36]:
def cosine_similarity(x, y):
    return np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y))

from heapq import heappush, heappop
def nearest_neighbors(x, n=10):
    if isinstance(x, str):
        x = word_vectors[x]
    heap = []
    for w, v in word_vectors.items():
        sim = cosine_similarity(x, v)
        if len(heap) < n:
            heappush(heap, (sim, w))
        else:
            if heap[0] < (sim, w):
                heappop(heap)
                heappush(heap, (sim, w))
    return sorted(heap, reverse=True)

In [37]:
nearest_neighbors("hello")

[(0.99999994, 'hello'),
 (0.61714774, 'goodbye'),
 (0.59412915, 'goodnight'),
 (0.57801807, 'greeting'),
 (0.5480656, 'farewell'),
 (0.5474026, 'hey'),
 (0.5347895, 'hi'),
 (0.52100366, 'ひ'),
 (0.5076819, 'congratulations'),
 (0.50653183, '人')]

In [38]:
nearest_neighbors("programmer")

[(1.0, 'programmer'),
 (0.8326575, 'programmers'),
 (0.6889887, 'keyboardist'),
 (0.6881369, 'mathematician'),
 (0.6862794, 'mathematicians'),
 (0.68078387, '[unused8]'),
 (0.6806002, '[unused782]'),
 (0.68054837, 'র'),
 (0.6805353, '1756'),
 (0.680412, '[unused59]')]

In [39]:
nearest_neighbors("doctor")

[(1.0000001, 'doctor'),
 (0.7093963, 'doctors'),
 (0.588878, 'physician'),
 (0.5472802, 'physicians'),
 (0.5272302, 'psychiatrist'),
 (0.5178553, 'dentist'),
 (0.51108694, 'surgeon'),
 (0.5001953, 'healer'),
 (0.4947291, 'medical'),
 (0.4873745, 'lawyer')]

In [40]:
nearest_neighbors("queen")

[(1.0, 'queen'),
 (0.64685124, 'king'),
 (0.60091084, 'queens'),
 (0.59095985, 'princess'),
 (0.5419779, 'empress'),
 (0.50120705, 'prince'),
 (0.50038207, 'duchess'),
 (0.49041915, 'countess'),
 (0.4852578, 'monarch'),
 (0.4709278, 'lady')]

In [41]:
nearest_neighbors(word_vectors["man"] - word_vectors["woman"] + word_vectors["king"])

[(0.7900536, 'king'),
 (0.5477464, 'man'),
 (0.41445062, 'kings'),
 (0.41256273, 'prince'),
 (0.35672778, 'queen'),
 (0.35161597, '##man'),
 (0.34965998, '336'),
 (0.3495944, '670'),
 (0.34730172, '268'),
 (0.34716332, '263')]

In [42]:
nearest_neighbors(word_vectors["man"])

[(1.0, 'man'),
 (0.63370425, 'woman'),
 (0.5670583, 'men'),
 (0.536309, '##man'),
 (0.50406426, 'boy'),
 (0.49270344, 'girl'),
 (0.48984453, 'person'),
 (0.46555227, 'guy'),
 (0.46152157, '229'),
 (0.45967817, '228')]

In [43]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
vec2 = get_word_vector("she is [MASK].", "[MASK]")

In [76]:
diff = (-(out_softmax @ (vec1 - vec2)))
ordering = diff.argsort()
{
    diff[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{0.47656142711639404: 'ethel',
 0.47736799716949463: '##chaft',
 0.49019378423690796: '##eil',
 0.5009146928787231: '##etta',
 0.5041635036468506: '##nction',
 0.5074675381183624: '##ettes',
 0.5083328783512115: 'hostess',
 0.5167080760002136: '##lip',
 0.5244753360748291: 'æ',
 0.525703489780426: 'comets'}

In [45]:
vec1 = get_word_vector("he is [MASK].", "is")
vec2 = get_word_vector("she is [MASK].", "is")
np.dot(vec1, vec2) / (np.linalg.norm(vec1) * np.linalg.norm(vec2))

0.9822475

In [46]:
vec3 = get_word_vector("he is [MASK].", "[MASK]")
vec4 = get_word_vector("she is [MASK].", "[MASK]")
np.dot(vec3, vec4) / (np.linalg.norm(vec3) * np.linalg.norm(vec4))

0.9783271

In [47]:
cosine_similarity((vec1 - vec2), (vec3 - vec4))

0.5959766

In [48]:
vec1 = get_word_vector("she is [MASK].", "[MASK]")
aaaa = out_softmax @ vec1
ordering = (-aaaa).argsort()
{
    aaaa[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{7.317891597747803: '[CLS]',
 4.563726186752319: '[MASK]',
 3.5200722217559814: ',',
 3.3923771381378174: '.',
 3.066467761993408: 'and',
 3.0135951042175293: '[SEP]',
 2.9601149559020996: 'of',
 2.9333361387252808: 'the',
 2.9225765466690063: 'in',
 2.86414110660553: 'a'}

In [49]:
vec1 = get_word_vector("he is [MASK].", "[MASK]")
ordering = (-(out_softmax @ vec1)).argsort()
{
    ordering[i] + 1: processor.index_to_token(i)
    for i in ordering[:10]
}

{30369: '[CLS]',
 17073: '[MASK]',
 20660: ',',
 11980: '.',
 3710: '[SEP]',
 27237: 'and',
 11256: 'the',
 27213: 'of',
 1869: 'in',
 23266: 'to'}

### Comparing words in context

In [50]:
vec1 = get_word_vector("he is a programmer.", "programmer")
vec2 = get_word_vector("she is a programmer.", "programmer")
vec3 = get_word_vector("the programmer wrote code on the board.", "programmer")

In [51]:
cosine_similarity(vec1, vec2)

0.97780985

In [52]:
cosine_similarity(vec1, vec3)

0.70002395

In [53]:
vec1 = get_word_vector("he is a nurse.", "nurse")
vec2 = get_word_vector("she is a nurse.", "nurse")
vec3 = get_word_vector("the nurse wrote code on the board.", "nurse")

In [54]:
cosine_similarity(vec1, vec2)

0.9798227

In [55]:
cosine_similarity(vec1, vec3)

0.7220019

In [56]:
vecs = []
vecs.append(get_word_vector("he is a programmer.", "programmer"))
vecs.append(get_word_vector("he is a programmer.", "he"))
vecs.append(get_word_vector("she is a programmer.", "programmer"))
vecs.append(get_word_vector("she is a programmer.", "she"))

In [57]:
def construct_sim_matrix(vecs):
    sim_matrix = np.zeros((len(vecs), len(vecs)))
    for i, v in enumerate(vecs):
        for j, w in enumerate(vecs):
            sim_matrix[i, j] = cosine_similarity(v, w)
    return sim_matrix

In [58]:
construct_sim_matrix(vecs)

array([[0.99999994, 0.26702088, 0.97780985, 0.25233978],
       [0.26702088, 1.00000012, 0.3049854 , 0.78304911],
       [0.97780985, 0.3049854 , 1.00000012, 0.29841113],
       [0.25233978, 0.78304911, 0.29841113, 1.        ]])

In [59]:
vecs = []
vecs.append(get_word_vector("he is a programmer.", "he"))
vecs.append(get_word_vector("she is a programmer.", "she"))
vecs.append(get_word_vector("his profession is a programmer.", "his"))
vecs.append(get_word_vector("her profession is a programmer.", "her"))
vecs.append(get_word_vector("please talk to him.", "him"))
vecs.append(get_word_vector("please talk to her.", "her"))
vecs.append(get_word_vector("I work as a programmer.", "programmer"))
vecs.append(get_word_vector("I work as a nurse.", "nurse"))
vecs.append(get_word_vector("I work as a doctor.", "doctor"))
vecs.append(get_word_vector("I work as a nurse.", "nurse"))
vecs.append(get_word_vector("I am your father.", "father"))
vecs.append(get_word_vector("I am your mother.", "mother"))

In [60]:
cosine_similarity(vecs[1]- vecs[0], vecs[3] - vecs[2])

0.86490756

In [61]:
cosine_similarity(vecs[1]- vecs[0], vecs[5] - vecs[4])

0.70507485

In [62]:
cosine_similarity(vecs[3]- vecs[2], vecs[5] - vecs[4])

0.68636286

In [63]:
cosine_similarity(vecs[3]- vecs[2], vecs[7] - vecs[6])

0.15071449

In [64]:
cosine_similarity(vecs[3]- vecs[2], vecs[9] - vecs[8])

0.22179423

So, there does seem to be a gender subspace...?

In [65]:
cosine_similarity(vecs[3]- vecs[2], vecs[11] - vecs[10])

0.5433057

### Checking for similarity

Can't find much of a difference...

In [66]:
prog_vec = get_word_vector("[MASK] is a programmer.", "programmer")
she_vec = get_word_vector("she is a programmer.", "she")
he_vec = get_word_vector("he is a programmer.", "he")
construct_sim_matrix([prog_vec, she_vec, he_vec])

array([[1.        , 0.24964862, 0.24885985],
       [0.24964862, 1.        , 0.78304911],
       [0.24885985, 0.78304911, 1.00000012]])

In [67]:
prog_vec = get_word_vector("[MASK] is a programmer.", "programmer")
she_vec = get_word_vector("she is a programmer.", "programmer")
he_vec = get_word_vector("he is a programmer.", "programmer")
construct_sim_matrix([prog_vec, she_vec, he_vec])

array([[1.        , 0.88541675, 0.9050402 ],
       [0.88541675, 1.00000012, 0.97780985],
       [0.9050402 , 0.97780985, 0.99999994]])

### Construct gender subspace

A simple test (TODO: Automate construction)

In [68]:
male_vecs, female_vecs = [], []
def add_word_vecs(s: str, male_w: str, female_w: str):
    male_vecs.append(get_word_vector(s.replace("XXX", male_w), male_w))
    female_vecs.append(get_word_vector(s.replace("XXX", female_w), female_w))

for prof in ["musician", "magician", "nurse", "doctor", "teacher"]:
    add_word_vecs("XXX is a YYY".replace("YYY", prof), "he", "she")
    add_word_vecs("XXX works as a YYY".replace("YYY", prof), "he", "she")

for action in ["talk to", "hit", "ignore", "please", "remove"]:
    add_word_vecs("please YYY XXX".replace("YYY", action), "him", "her")
    add_word_vecs("don't YYY XXX".replace("YYY", action), "him", "her")

for thing in ["food", "music", "work", "running", "cooking"]:
    add_word_vecs("XXX likes YYY".replace("YYY", thing), "he", "she")
    add_word_vecs("XXX enjoys YYY".replace("YYY", thing), "he", "she")

In [69]:
male_vecs = np.r_[male_vecs]
female_vecs = np.r_[female_vecs]

In [70]:
from sklearn.decomposition import TruncatedSVD
svd = TruncatedSVD(n_components=5)

In [71]:
(male_vecs - female_vecs).shape

(30, 768)

In [72]:
X = svd.fit_transform(male_vecs - female_vecs)

In [73]:
svd.explained_variance_ratio_.sum()

0.66273504

In [74]:
svd.components_.shape

(5, 768)

### Try eliminating this subspace and checking outputs softmax

In [75]:
svd.components_

array([[-1.57602888e-03,  6.55766129e-02,  1.21871056e-02, ...,
         1.92999933e-02,  6.08131923e-02, -4.85952981e-02],
       [-4.28800881e-02, -4.10818378e-04,  2.28636040e-04, ...,
        -1.15400646e-02, -1.22710131e-02, -4.74280305e-02],
       [-1.22262845e-02, -3.16431224e-02,  5.98343499e-02, ...,
         3.35653573e-02, -1.42056751e-03,  1.63482148e-02],
       [ 2.51309257e-02,  3.93582471e-02, -1.67421568e-02, ...,
        -3.22544836e-02, -3.98247689e-02, -5.09709455e-02],
       [ 8.02401701e-05, -8.46917089e-03, -2.29576677e-02, ...,
        -7.82717317e-02,  1.30880391e-02, -4.73379856e-03]], dtype=float32)

In [95]:
vec = get_word_vector("[MASK] is a nurse.", "[MASK]")

In [101]:
logits_before = (out_softmax @ vec)

In [102]:
logits_before[processor.token_to_index("she")]

1.6940118

In [103]:
logits_before[processor.token_to_index("he")]

1.2605883

In [126]:
def eliminate_subspace(v, subspace):
    # TODO: Is there a better way?
    V = subspace
    beta = (np.linalg.inv(V @ V.T) @ V) @ v
    res = (v - (V.T @ beta))
    return res

In [127]:
vec_after = eliminate_subspace(vec, svd.components_)

In [128]:
logits_after = (out_softmax @ vec_after)

The difference is indeed reduced

In [129]:
logits_after[processor.token_to_index("she")]

1.2629368

In [130]:
logits_after[processor.token_to_index("he")]

1.1950184