|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 5:</h2>|<h1>Observation (non-causal) mech interp<h1>|
|<h2>Section:</h2>|<h1>Investigating token embeddings<h1>|
|<h2>Lecture:</h2>|<h1><b>CodeChallenge: Word2vec vs. GPT2<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">udemy.com/course/dullms_x/?couponCode=202508</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
# !pip install gensim

In [None]:
import numpy as np
import matplotlib.pyplot as plt

import gensim.downloader as api
from transformers import GPT2Model,GPT2Tokenizer

import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [None]:
# import word2vec
w2v = api.load('word2vec-google-news-300')

In [None]:
# pretrained GPT-2 model and tokenizer
gpt2 = GPT2Model.from_pretrained('gpt2')
gpt_tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

# GPT embeddings matrix
gpt_embedding = gpt2.wte.weight.detach().numpy()

# Exercise 1: Find 100 matching tokens

In [None]:
# get the word2vec vocab
w2v_tokens = list(w2v.key_to_index.keys())
w2v_tokens[:10]

In [None]:
# same for gpt2
gpt2_tokens = [gpt_tokenizer.decode([i]) for i in range(gpt_tokenizer.vocab_size)]
gpt2_tokens[10000:10010]

In [None]:
# find 6-letter tokens in word2vec, and see if they match in gpt2.
# take the first 100 matches for RSA
tokens2compare = []

# loop over word2vec words
for word in w2v_tokens:

  # skip if word is not 6 characters long
  if len(word)!=6: continue

  # check if it exists in gpt
  try:
    gpt2_tokens.index(word) # just see if it works
    tokens2compare.append(word)
  except: pass

  # stopping criteria
  if len(tokens2compare)>99:
    break

In [None]:
for word in tokens2compare:
  print(f'"{word}" is index {w2v_tokens.index(word):4} in w2v and index {gpt2_tokens.index(word):5} in GPT2.')

# Exercise 2: Create embeddings matrices

In [None]:
# embeddings matrix for these words
E_w2v = np.array([w2v[w] for w in tokens2compare])
E_gpt = np.array([gpt_embedding[gpt_tokenizer.encode(w)[0],:] for w in tokens2compare])

In [None]:
# check matrices sizes
print(f'Size of w2v matrix: {E_w2v.shape}')
print(f'Size of gpt matrix: {E_gpt.shape}')

# sanity-check that they're really different
plt.figure(figsize=(10,4))
plt.plot(range(E_gpt.shape[1]),E_gpt[0,:],'o-',label='GPT2')
plt.plot(range(E_w2v.shape[1]),E_w2v[0,:],'s-',label='word2vec')
plt.gca().set(xlim=[-5,E_gpt.shape[1]+5],xlabel='Dimension',ylabel='Value',title=f'Embeddings for "{tokens2compare[0]}"')

plt.legend(fontsize=10)
plt.show()

# Exercise 3: Cosine similarity matrices

In [None]:
# normalize each vector to its norm (unit length)
E_w2v_norm = E_w2v / np.linalg.norm(E_w2v,axis=1,keepdims=True)
E_gpt_norm = E_gpt / np.linalg.norm(E_gpt,axis=1,keepdims=True)

# cosine similarity matrices
cs_matrix_w2v = E_w2v_norm @ E_w2v_norm.T
cs_matrix_gpt = E_gpt_norm @ E_gpt_norm.T

In [None]:
fig,axs = plt.subplots(1,2,figsize=(12,5))

skip = 5

# word2vec
h = axs[0].imshow(cs_matrix_w2v,vmin=.1,vmax=.6,cmap='plasma')
axs[0].set(xticks=range(0,len(tokens2compare),skip),xticklabels=tokens2compare[::skip],
           yticks=range(1,len(tokens2compare),skip),yticklabels=tokens2compare[1::skip],
           title='Cossim matrix for word2vec')
axs[0].tick_params(axis='x',labelrotation=90)
fig.colorbar(h,ax=axs[0],pad=.02)


# GPT2
h = axs[1].imshow(cs_matrix_gpt,vmin=.1,vmax=.6,cmap='plasma')
axs[1].set(xticks=range(0,len(tokens2compare),skip),xticklabels=tokens2compare[::skip],
           yticks=range(1,len(tokens2compare),skip),yticklabels=tokens2compare[1::skip],
           title='Cossim matrix for GPT-2')
axs[1].tick_params(axis='x',labelrotation=90)
fig.colorbar(h,ax=axs[1],pad=.02)

plt.tight_layout()
plt.show()

# Exercise 4: Quantitative comparison via RSA

In [None]:
# extract the upper-triangular elements
unique_w2v = cs_matrix_w2v[np.triu_indices_from(cs_matrix_w2v, k=1)]
unique_gpt = cs_matrix_gpt[np.triu_indices_from(cs_matrix_gpt, k=1)]

# Pearson correlation
r = np.corrcoef(unique_w2v,unique_gpt)[0,1]

# cosine similarity
num = sum(unique_w2v*unique_gpt)
den = sum(unique_w2v**2) * sum(unique_gpt**2)
sc = num/np.sqrt(den)

# plot
plt.plot(unique_w2v,unique_gpt,'ks',markerfacecolor=[.7,.9,.7,.7])
plt.gca().set(xlim=[-.2,1],ylim=[-.2,1],xlabel='w2v cosine similarities',ylabel='GPT cosine similarities',
              title=f'r = {r:.3f}, $S_c$ = {sc:.3f}')
plt.axhline(0,linestyle='--',color=[.8,.8,.8])
plt.axvline(0,linestyle='--',color=[.8,.8,.8],zorder=-19)

plt.show()