|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 5:</h2>|<h1>Observation (non-causal) mech interp<h1>|
|<h2>Section:</h2>|<h1>Investigating layers<h1>|
|<h2>Lecture:</h2>|<h1><b>CodeChallenge: Dimensionalities in Pythia 2.3B<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">udemy.com/course/dullms_x/?couponCode=202508</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl
import matplotlib.gridspec as gridspec

import requests

import torch

# vector plots
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# Exercise 1: Import and inspect Pythia-2.8b

In [None]:
from transformers import AutoModelForCausalLM, AutoTokenizer

# Eleuther's tokenizer
tokenizer = AutoTokenizer.from_pretrained('EleutherAI/pythia-2.8b')

# and their pythia model
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-2.8b")

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)
model.eval()

In [None]:
# Alice in Wonderland
text = requests.get('https://www.gutenberg.org/cache/epub/11/pg11.txt').text
text = requests.get('https://pigeonsarentreal.co.uk/').text # a funny website

allTokens = tokenizer.encode(text,return_tensors='pt')
tokens = allTokens[:,10000:11000]

print(tokenizer.decode(tokens[0]))

In [None]:
tokensShuffle = tokens[0,torch.randperm(len(tokens[0]))].unsqueeze(0)
print(tokenizer.decode(tokensShuffle[0]))

In [None]:
# push through the model
with torch.no_grad():
  outputs_real = model(tokens.to(device),output_hidden_states=True)
  outputs_shuf = model(tokensShuffle.to(device),output_hidden_states=True)

print(f'There are {len(outputs_real.hidden_states)} hidden layers.')
print(f'Each layer has size {outputs_real.hidden_states[0].shape}')

In [None]:
numHidden = len(outputs_real.hidden_states)

# Exercise 2: Calculate dimensionality matrices

In [None]:
# initialize
cumVarExplained = np.zeros((numHidden,outputs_real.hidden_states[4].shape[1],2))
effectiveCompCount = np.zeros((numHidden,2,2),dtype=int)


# loop over layers
for layeri in range(numHidden):

  # extract all the activations from this layer (assuming no batches)
  acts = outputs_real.hidden_states[layeri].squeeze().cpu().numpy()

  # mean-center the activations
  acts -= acts.mean(axis=0,keepdims=True)

  # get singular values
  s = np.linalg.svd(acts)[1]

  # percent explained (cumulative)
  pctExplained = 100 * s**2 / np.sum(s**2)
  cumVarExplained[layeri,:,0] = np.cumsum(pctExplained) # cumulative

  # count the components until 95% or 99% variance is explained
  effectiveCompCount[layeri,0,0] = np.where(cumVarExplained[layeri,:,0]>95)[0][0]+1
  effectiveCompCount[layeri,0,1] = np.where(cumVarExplained[layeri,:,0]>99)[0][0]+1


  ### repeat for shuffled tokens
  acts = outputs_shuf.hidden_states[layeri].squeeze().cpu().numpy()
  acts -= acts.mean(axis=0,keepdims=True)
  s = np.linalg.svd(acts)[1] # get singular values
  pctExplained = 100 * s**2 / np.sum(s**2) # percent explained
  cumVarExplained[layeri,:,1] = np.cumsum(pctExplained) # cumulative
  effectiveCompCount[layeri,1,0] = np.where(cumVarExplained[layeri,:,1]>95)[0][0]+1
  effectiveCompCount[layeri,1,1] = np.where(cumVarExplained[layeri,:,1]>99)[0][0]+1

# Exercise 3: Visualizations

In [None]:
# setup the figure and axes
_,axs = plt.subplots(1,2,figsize=(12,4))

# normalization function for mapping layer index onto color
norm = mpl.colors.Normalize(vmin=0,vmax=numHidden)

# plt the cumulative variance explained
for layeri in range(numHidden):
  axs[0].plot(cumVarExplained[layeri,:,0],color=mpl.cm.plasma(norm(layeri)))
  axs[1].plot(cumVarExplained[layeri,:,1],color=mpl.cm.plasma(norm(layeri)))

for i in range(2):
  axs[i].axhline(95,linestyle='--',color='gray')
  axs[i].axhline(99,linestyle='--',color='gray')

# add colorbars
sm = mpl.cm.ScalarMappable(cmap=mpl.cm.plasma,norm=norm)
cbar = plt.colorbar(sm,ax=axs[0])
cbar.set_label(r'Hidden layer')
cbar = plt.colorbar(sm,ax=axs[1])
cbar.set_label(r'Hidden layer')

# make it look nicer
axs[0].set(xlabel='Component number',ylabel='% explained (cumulative)',ylim=[80,100.5],xlim=[-2,900],title='(Real) variance explained')
axs[1].set(xlabel='Component number',ylabel='% explained (cumulative)',ylim=[80,100.5],xlim=[-2,900],title='(Shuffled) variance explained')

plt.tight_layout()
plt.show()

In [None]:
# convert to percent of total possible dimensionality
effectiveCompCountP = 100*effectiveCompCount / len(s)

_,axs = plt.subplots(1,2,figsize=(12,4))

## plot the "effective subspace dimensionality" of the hidden layers
for i in range(2):
  axs[i].plot(effectiveCompCountP[:,1,i],'ks',markerfacecolor=[.9,.7,.7,.5],markersize=10,label='Shuffled tokens')
  axs[i].plot(effectiveCompCountP[:,0,i],'ko',markerfacecolor=[.7,.9,.7],markersize=10,label='Real tokens')
  axs[i].legend()
  axs[i].set(xlabel='Hidden layer (including embedding)',ylabel='% max dimensionality',
             title=f'"Effective dimensionality" to reach {[95,99][i]}% total',
             ylim=[-1,effectiveCompCountP.max()*1.1],xlim=[-1,numHidden])


plt.tight_layout()
plt.show()

# Exercise 4: Other texts

In [None]:
# replace the "alice" text in exercise 1 with the following, and then re-run the code

# text = requests.get('https://www.gutenberg.org/cache/epub/219/pg219.txt').text # Heart of Darkness
# text = requests.get('https://pigeonsarentreal.co.uk/').text # a funny website