|<h2>Substack post:</h2>|<h1><a href="https://mikexcohen.substack.com/p/llm-breakdown-36-embeddings" target="_blank">LLM breakdown 3/6: Embeddings</a></h1>|
|-|:-:|
|<h2>Teacher:<h2>|<h1>Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h1>|

<br>

<i>Using the code without reading the post may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
from matplotlib import gridspec

from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN

# pytorch libraries
import torch
import torch.nn.functional as F

# huggingface LLM
from transformers import GPT2Tokenizer

In [None]:
### Run this cell only if you're using "dark mode"

# svg plots (higher-res)
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

plt.rcParams.update({
    'figure.facecolor': '#171717',
    'figure.edgecolor': '#171717',
    'axes.facecolor':   '#171717',
    'axes.edgecolor':   '#DDE2F4',
    'axes.labelcolor':  '#DDE2F4',
    'xtick.color':      '#DDE2F4',
    'ytick.color':      '#DDE2F4',
    'text.color':       '#DDE2F4',
    'axes.spines.right': False,
    'axes.spines.top':   False,
    'axes.titleweight': 'bold',
    'axes.labelweight': 'bold',
})

# Importing GPT2's embeddings matrix

In [None]:
# huggingface LLM
from transformers import AutoModelForCausalLM, GPT2Tokenizer

# GPT2 model and its tokenizer
model = AutoModelForCausalLM.from_pretrained('gpt2')
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')

In [None]:
# toggle model into "evaluation" mode (turns off training-related operations)
model.eval()

In [None]:
embeddings = model.transformer.wte.weight.detach()
embeddings.shape

# Demo 1: Visualizing embeddings vectors

In [None]:
# two words (should be single-token)
word1 = 'hello'
word2 = 'world'

# get two token indices
token1 = tokenizer.encode(word1)
token2 = tokenizer.encode(word2)

# their embeddings vectors
emb1 = embeddings[token1].squeeze()
emb2 = embeddings[token2].squeeze()

# and their cosine similarity
cos_sim = F.cosine_similarity(emb1.unsqueeze(0), emb2)

# and plot
fig = plt.figure(figsize=(12,3))
gs = gridspec.GridSpec(1,4,figure=fig)
ax1 = fig.add_subplot(gs[:3])
ax2 = fig.add_subplot(gs[-1])

ax1.plot(emb1,'ks',markerfacecolor=[.9,.7,.7,.5],markersize=5,label=word1)
ax1.plot(emb2,'ko',markerfacecolor=[.7,.9,.7,.5],markersize=5,label=word2)

ax1.set(xlabel='Embeddings dimension',ylabel='Value',xlim=[-5,len(emb1)+5],title='Embeddings of two words')
ax1.legend()

ax2.plot(emb1,emb2,'ko',markerfacecolor=[.7,.7,.9,.5])
ax2.set(xlabel=f'Embeddings of "{word1}"',ylabel=f'Embeddings of "{word2}"',
        title=f'Cosine similarity: {cos_sim.item():.2f}')

plt.tight_layout()
plt.show()

In [None]:
# the whole matrix
plt.figure(figsize=(12,4))
plt.imshow(embeddings.T,vmin=-.1,vmax=.1,aspect='auto',cmap='bwr')

plt.gca().set(xlabel='Token index',ylabel='Embeddings dimension',
              title='Embeddings matrix')

plt.colorbar(pad=.01)
plt.show()

# Demo 2: Dimension-reducing embeddings

In [None]:
# extract the first N embeddings
nToks = 100
subEmbed = embeddings[:nToks,:]

# reduce to 2D with t-SNE
tsne = TSNE(n_components=2,perplexity=5)
tsne_result = tsne.fit_transform(subEmbed)

# the result is an Nx2 matrix
tsne_result.shape

In [None]:
# plot the results
plt.figure(figsize=(7,6))

plt.scatter(tsne_result[:,0], tsne_result[:,1], color=[.7,.7,1],edgecolor='k')

# label words
yoffset = .02 * np.diff(plt.gca().get_ylim()) # shift words up by x%
for i in range(nToks):
  plt.text(tsne_result[i,0], tsne_result[i,1]+yoffset, tokenizer.decode([i]),  ha='center')

plt.gca().set(xlabel='TSNE dim 1',ylabel='TSNE dim 2',title='T-SNE visualization of embeddings')

plt.tight_layout()
plt.show()

# Demo 3: Manipulating embeddings vectors

In [None]:
# the main text
text = 'The capital of Germany is'

# source and target tokens
source = ' Germany'
target = ' Berlin'
distractor_source = ' France'
distractor_target = ' Paris'

# tokenize the texts and target words
tokens = tokenizer.encode(text,return_tensors='pt')
source_idx = tokenizer.encode(source)
target_idx = tokenizer.encode(target)

distractor_source_idx = tokenizer.encode(distractor_source)
distractor_target_idx = tokenizer.encode(distractor_target)

# index of the source word to replace
country_loc = torch.where(tokens[0]==tokenizer.encode(source)[0])[0].item()
country_loc

In [None]:
with torch.no_grad():
  sm_logits = F.softmax(model(tokens).logits.detach(),dim=-1)

# probabilities of the two target tokens
target_logit = sm_logits[0,-1,target_idx]
distractor_target_logit = sm_logits[0,-1,distractor_target_idx]

print(f'           Target prob: {100*target_logit.item():.2f}%')
print(f'Distractor target prob: {100*distractor_target_logit.item():.2f}%')

In [None]:
pGermany = 1

# define and implant the hook function
def hook(module, input, output):

  # 1) print shape info
  print(f'Variable "output" has shape {output.shape}')

  # 2) create a new embeddings vector as some mixture of " Germany" and " France"
  mixed_vector =   pGermany  * embeddings[source_idx,:] + \
                (1-pGermany) * embeddings[distractor_source_idx,:]

  # 3) replace that vector
  print(f'Variable "mixed_vector" has shape {mixed_vector.shape}')
  output[0,country_loc,:] = mixed_vector

  # 4) and return the modified version
  return output

# 5) implant the hook function
hookHandle = model.transformer.wte.register_forward_hook(hook)

In [None]:
pGermany = 0

with torch.no_grad():
  sm_logits = F.softmax(model(tokens).logits.detach(),dim=-1)

target_logit = sm_logits[0,-1,target_idx]
distractor_target_logit = sm_logits[0,-1,distractor_target_idx]

print('')
print(f'           Target prob: {100*target_logit.item():.2f}%')
print(f'Distractor target prob: {100*distractor_target_logit.item():.2f}%')

In [None]:
# Now for the experiment!

# mixture values
mixtures = np.linspace(0,1,17)

# initialize
target_prob = np.zeros((len(mixtures),2))

for i in range(len(mixtures)):

  # set the mixing parameter (globally defined and read by the hook function)
  pGermany = mixtures[i]

  # run the tokens through the model
  with torch.no_grad():
    sm_logits = F.softmax(model(tokens).logits.detach(),dim=-1)

  # get the output logits for the targets
  target_prob[i,0] = 100*sm_logits[0,-1,target_idx]
  target_prob[i,1] = 100*sm_logits[0,-1,distractor_target_idx]


# remove the hook
hookHandle.remove()

In [None]:
plt.figure(figsize=(8,4))

plt.plot(mixtures,target_prob[:,0],'ko-',markerfacecolor=[.7,.9,.7],markersize=10,label='Berlin')
plt.plot(mixtures,target_prob[:,1],'ks-',markerfacecolor=[.7,.7,.9],markersize=10,label='Paris')

plt.gca().set(xlabel='Manipulated vector proportion',ylabel='Softmax probability (%)',title='$\\sigma$(logits) to "The capital of [Germany/France] is"',
              xticks=[0,.5,1],xticklabels=['100%\n"France"','50/50','100%\n"Germany"'])
plt.legend(fontsize=16)

plt.show()