|<h2>Substack post:</h2>|<h1><a href="https://mikexcohen.substack.com/p/llm-breakdown-36-embeddings" target="_blank">"King - man + woman = queen" is fake news</a></h1>|
|-|:-:|
|<h2>Teacher:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the post may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

from sklearn.metrics.pairwise import cosine_similarity

# pytorch libraries
import torch

In [None]:
### Run this cell only if you're using "dark mode"

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    'figure.facecolor': '#383838',
    'figure.edgecolor': '#383838',
    'axes.facecolor':   '#383838',
    'axes.edgecolor':   '#DDE2F4',
    'axes.labelcolor':  '#DDE2F4',
    'xtick.color':      '#DDE2F4',
    'ytick.color':      '#DDE2F4',
    'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
})

# Demo 1: Tokenization and embeddings in BERT

In [None]:
# load BERT tokenizer and model
from transformers import BertTokenizer, BertForMaskedLM

tokenizer = BertTokenizer.from_pretrained('bert-large-uncased')
model = BertForMaskedLM.from_pretrained('bert-large-uncased')
model.eval()

In [None]:
# extract the embeddings matrix
embeddings = model.bert.embeddings.word_embeddings.weight.detach().numpy()
embeddings.shape

In [None]:
# tokenize
words = [ 'king','man','woman' ]
tokens = tokenizer.encode(words,add_special_tokens=False)

# print the token indices and corresponding tokens (words)
for w,tok in zip(words,tokens):
  print(f'Index {tok} is "{w}"')

In [None]:
# find and plot embeddings
plt.figure(figsize=(10,3))

for w,tok in zip(words,tokens):

  # plot the embeddings vector
  plt.plot(embeddings[tok],label=w,linewidth=.8)

plt.gca().set(xlabel='Embeddings dimension',ylabel='Embedding value',
              xlim=[0,embeddings.shape[1]])
plt.legend()
plt.show()

In [None]:
# find and plot embeddings
_,axs = plt.subplots(1,3,figsize=(12,3.5))

axs[0].plot(embeddings[tokens[0]],embeddings[tokens[1]],'wh',markerfacecolor=[.9,.7,.7,.5],markeredgewidth=.5)
axs[0].set(xlabel=words[0],ylabel=words[1],title=f'"{words[0]}" vs. {words[1]}"')

axs[1].plot(embeddings[tokens[0]],embeddings[tokens[2]],'wh',markerfacecolor=[.7,.9,.7,.5],markeredgewidth=.5)
axs[1].set(xlabel=words[0],ylabel=words[2],title=f'"{words[0]}" vs. {words[2]}"')

axs[2].plot(embeddings[tokens[1]],embeddings[tokens[2]],'wh',markerfacecolor=[.7,.7,.9,.5],markeredgewidth=.5)
axs[2].set(xlabel=words[1],ylabel=words[2],title=f'"{words[1]}" vs. {words[2]}"')

plt.tight_layout()
plt.show()

In [None]:
# cosine similarities

all_cs = cosine_similarity(embeddings[tokens])

# note: printed order is hard-coded to ['king','man','woman']
print(f'Similarity of {all_cs[0,1]:.3f} between "king" and "man"')
print(f'Similarity of {all_cs[0,2]:.3f} between "king" and "woman"')
print(f'Similarity of {all_cs[1,2]:.3f} between "man" and "woman"')

# Demo 2: An analogy-completing function

In [None]:
def analogyCalculator(word2start,word2subtract,word2add):

  # 1) print the analogy
  print(f'"{word2start}" is to "{word2subtract}" as "_____" is to "{word2add}"\n')

  # 2) tokenize the words
  tokens = tokenizer.encode([word2start,word2subtract,word2add],
                            add_special_tokens=False)

  # 3) check that each word is one token
  if len(tokens)>3:
    raise ValueError("Warning: too many tokens.")
  if '[UNK]' in tokenizer.decode(tokens):
    raise ValueError("Unknown token: ",tokenizer.decode(tokens))

  # 4) get the vectors
  v1 = embeddings[tokens[0]] # base word
  v2 = embeddings[tokens[1]] # to subtract
  v3 = embeddings[tokens[2]] # to add

  # 5) analogy vector
  analogyVector = v1 - v2 + v3

  # 6) cossim with all
  cossim2all = cosine_similarity(analogyVector.reshape(1,-1),embeddings)[0]

  # 7) zero-out self-token similarity values
  cossim2all[tokens] = 0

  # 8) print out the top 10 highest scores
  top10 = cossim2all.argsort()[-10:][::-1]
  print('Similarity  |  Shared var.  |    word')
  print('------------+---------------+-------------')
  for widx in top10:
    # correlation (square it to get shared variance)
    r = np.corrcoef(analogyVector,embeddings[widx])[0,1]
    print(f'    {cossim2all[widx]:.3f}   |     {100*r**2:4.1f}%     |  "{tokenizer.decode(widx)}"')


In [None]:
# try it
analogyCalculator('king','man','woman')

In [None]:
analogyCalculator('tree','leaf','petals')
# analogyCalculator('leaf','tree','flower') # turn it around for better results?
# analogyCalculator('husky','dog','bird')
# analogyCalculator('finger','hand','foot')
# analogyCalculator('tomorrow','future','past')
# analogyCalculator('pants','legs','arms')

# Demo 3: Analogies during language processing (one sentence)

In [None]:
# 1) define and tokenize a sentence
sentence = "The king appointed both a man and a woman to serve as advisors to the queen in his court."
tokens = tokenizer.encode(sentence,add_special_tokens=False,return_tensors='pt')

# 2) find the indices for king, man, and woman
target_tokens = tokenizer.encode(['king','man','woman','queen'],add_special_tokens=False)
king_loc  = torch.where(tokens[0,:] == target_tokens[0])[0].item()
man_loc   = torch.where(tokens[0,:] == target_tokens[1])[0].item()
woman_loc = torch.where(tokens[0,:] == target_tokens[2])[0].item()
queen_loc = torch.where(tokens[0,:] == target_tokens[3])[0].item()

# 3) forward pass and get hidden states
out = model(tokens,output_hidden_states=True)

# 4) check output sizes
print(f'There are {len(out.hidden_states)} "hidden states,"')
print(f'Each of which is size {out.hidden_states[3].shape}')

In [None]:
# redefining analogy tokens to exclude in the subsequent analysis
words = [ 'king','man','woman' ]
analogytokens = tokenizer.encode(words,add_special_tokens=False)
analogytokens

In [None]:
# 1) initialize a vector of cosine similarities (analogy -> queen)
cs = np.zeros((len(out.hidden_states),2))
topWords = []

# 2) loop over all layers
for layeri in range(len(out.hidden_states)):

  # 3) isolate this hidden layer
  hs = out.hidden_states[layeri].detach().squeeze()

  # 4) create the analogy vector
  analogyVector = hs[king_loc,:] - hs[man_loc,:] + hs[woman_loc,:]

  # 5) max cossim with all vocab items
  cossim2all = cosine_similarity(analogyVector.reshape(1,-1),embeddings)[0]
  cossim2all[analogytokens] = 0
  top1 = cossim2all.argsort()[-1]
  cs[layeri,0] = cossim2all[top1]
  topWords.append( tokenizer.decode(top1) )

  # 6) calculate cossim with "queen"
  cs[layeri,1] = cosine_similarity(analogyVector.reshape(1,-1),
                                   hs[queen_loc,:].reshape(1,-1) )[0].item()

# show the results!
plt.figure(figsize=(12,5))
plt.plot(cs[:,0],'ws',markerfacecolor=[.9,.7,.7],markersize=13,label='Top cs with embeddings')
plt.plot(cs[:,1],'wo',markerfacecolor=[.7,.9,.7],markersize=13,label='cs with queen')

plt.legend()
plt.gca().set(xlabel='Hidden layer',ylabel='Cosine similarity',
              xticks=range(0,len(out.hidden_states),2))
plt.show()

In [None]:
for i,w in enumerate(topWords):
  print(f'Top word in layer {i:2}: "{w}"')

# Demo 4: Addressing a nuance and a confound

In [None]:
# sentences generated by Claude
sentences = [ "The king appointed both a man and a woman to serve as advisors to the queen in his court.",
              "Every man and woman in the kingdom bowed when the king and queen entered the great hall.",
              "The woman told the man that the king had issued a new decree this morning, and that the queen agreed.",
              "When the king fell ill, the old man and the wise woman were summoned to help the queen transition into power.",
              "The brave woman and the young man requested an audience with the queen and king to present their petition." ]

# target tokens (include "the")
target_tokens = tokenizer.encode(['king','man','woman','queen','the'],add_special_tokens=False)

In [None]:
# initialize cosine similarity matrix
cs = np.zeros((len(out.hidden_states),len(sentences),2))

# 1) loop over sentences
for senti in range(len(sentences)):

  # 2) tokenize this sentence
  tokens = tokenizer.encode(sentences[senti],add_special_tokens=False,return_tensors='pt')

  # 3) find the indices for king, man, woman, and 'the'
  king_loc  = torch.where(tokens[0,:] == target_tokens[0])[0].item()
  man_loc   = torch.where(tokens[0,:] == target_tokens[1])[0].item()
  woman_loc = torch.where(tokens[0,:] == target_tokens[2])[0].item()
  queen_loc = torch.where(tokens[0,:] == target_tokens[3])[0].item()
  the_loc   = torch.where(tokens[0,:] == target_tokens[4])[0][-1].item()

  # 4) forward pass and get hidden states
  out = model(tokens,output_hidden_states=True)

  # 5) loop over all layers
  for layeri in range(len(out.hidden_states)):

    # 6) isolate this hidden layer
    hs = out.hidden_states[layeri].detach().squeeze()
    analogyVector = hs[king_loc,:] - hs[man_loc,:] + hs[woman_loc,:]

    # 7) target and baseline cossim
    cs[layeri,senti,0] = cosine_similarity(analogyVector.reshape(1,-1),hs[queen_loc,:].reshape(1,-1))[0].item()
    cs[layeri,senti,1] = cosine_similarity(analogyVector.reshape(1,-1),hs[the_loc,:].reshape(1,-1))[0].item()


In [None]:
plt.figure(figsize=(12,4))

# color mapping for the different sentences
colorord = np.linspace(.1,1,len(sentences))

# loop over sentences
for senti in range(len(sentences)):

  # slight x-axis jitters
  c = colorord[senti]
  xvals = np.arange(0,len(out.hidden_states)) + (c-.45)/5

  # plot the cosine similarity for this sentence
  plt.plot(xvals,cs[:,senti,0],'h-',color=mpl.cm.plasma(c),markerfacecolor=mpl.cm.plasma(c),
           alpha=.7,markersize=12,markeredgecolor='w')

# figure niceties
plt.gca().set(xlabel='Hidden layer',ylabel='Cosine similarity',title='"QUEEN" to analogy vector',
        xticks=range(0,len(out.hidden_states),2))
plt.grid(linestyle='--',color=[.3,.3,.3])

plt.show()

In [None]:
_,axs = plt.subplots(1,2,figsize=(12,3))
colorord = np.linspace(.1,1,len(sentences))

# loop over sentences
for senti in range(len(sentences)):

  # x-ticks and color
  xvals = np.arange(0,len(out.hidden_states)) + (c-.45)/5
  c = colorord[senti]

  # plot cosine similarity with "the"
  axs[0].plot(xvals,cs[:,senti,1],'h-',color=mpl.cm.plasma(c),markerfacecolor=mpl.cm.plasma(c),
                alpha=.7,markersize=9,markeredgecolor='w')

  # and the queen-the difference
  diff = cs[:,senti,0] - cs[:,senti,1]
  axs[1].plot(xvals,diff,'h-',color=mpl.cm.plasma(c),markerfacecolor=mpl.cm.plasma(c),
                alpha=.7,markersize=9,markeredgecolor='w')

# figure niceties
axs[0].set(xlabel='Hidden layer',ylabel='Cosine similarity',title='"THE" to analogy vector',
           xticks=range(0,len(out.hidden_states),2))
axs[1].set(xlabel='Hidden layer',ylabel='Cosine similarity',title='QUEEN - THE',
           xticks=range(0,len(out.hidden_states),2))

for a in axs: a.grid(linestyle='--',color=[.3,.3,.3])

plt.tight_layout()
plt.show()