|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 5:</h2>|<h1>Observation (non-causal) mech interp<h1>|
|<h2>Section:</h2>|<h1>Investigating token embeddings<h1>|
|<h2>Lecture:</h2>|<h1><b>CodeChallenge: Graph representation of cosine similarities<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">udemy.com/course/dullms_x/?couponCode=202508</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import matplotlib as mpl

import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

In [None]:
# load BERT tokenizer and model
from transformers import BertTokenizer, BertModel
tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')
model = BertModel.from_pretrained('bert-base-uncased')
embeddings = model.embeddings.word_embeddings.weight.detach().numpy()

# Exercise 1: Tokenize text

In [None]:
# https://en.wikipedia.org/wiki/Carbonated_water
text = "It is thought that the first person to aerate water with carbon dioxide was William Brownrigg in the 1740s. Joseph Priestley invented carbonated water, independently and by accident, in 1767 when he discovered a method of infusing water with carbon dioxide after having suspended a bowl of water above a beer vat at a brewery in Leeds, Yorkshire. He wrote of the 'peculiar satisfaction' he found in drinking it, and in 1772 he published a paper entitled Impregnating Water with Fixed Air. Priestley's apparatus, almost identical to that used by Henry Cavendish five years earlier, which featured a bladder between the generator and the absorption tank to regulate the flow of carbon dioxide, was soon joined by a wide range of others. However, it was not until 1781 that companies specialized in producing artificial mineral water were established and began producing carbonated water on a large scale. The first factory was built by Thomas Henry of Manchester, England. Henry replaced the bladder in Priestley's system with large bellows."

# https://en.wikipedia.org/wiki/Small-world_network
text = "A small-world network is a graph characterized by a high clustering coefficient and low distances. In an example of the social network, high clustering implies the high probability that two friends of one person are friends themselves. The low distances, on the other hand, mean that there is a short chain of social connections between any two people (this effect is known as six degrees of separation). Specifically, a small-world network is defined to be a network where the typical distance L between two randomly chosen nodes (the number of steps required) grows proportionally to the logarithm of the number of nodes N in the network,"

# get all the unique tokens
tokens = tokenizer.encode(text)[1:-1]
tokens = np.sort(list(set( tokens )))


plt.figure(figsize=(10,4))
plt.plot(tokens,'ko')
plt.gca().set(xlabel='Token index (sorted)',ylabel='Token number')
plt.show()

# Exercise 2: Calculate the cosine similarity matrix and threshold

In [None]:
E = embeddings[tokens,:]

# normalize each vector to its norm (unit length)
E_norm = E / np.linalg.norm(E,axis=1,keepdims=True)

# cosine similarity matrix
csM = E_norm @ E_norm.T

# visualize
plt.figure(figsize=(6,6))

plt.imshow(csM,vmin=-.2,vmax=1,cmap='plasma')
plt.gca().set(xlabel='Token index',ylabel='Token index')
plt.colorbar(fraction=.046,pad=.02)

plt.show()

In [None]:
# get a vector of all unique matrix elements
uniqueCS = csM[np.nonzero(np.triu(csM,1))]

# here's the threshold!
thresh = np.median(uniqueCS) + np.std(csM)

In [None]:
# show in a historgram
plt.figure(figsize=(8,4))
plt.hist(uniqueCS,bins=80,color=[.7,.7,.7],edgecolor='k')
plt.axvline(thresh,linestyle='--',color='m',label='Threshold (median + std)')

plt.legend()
plt.gca().set(xlabel='Cosine similarity value',ylabel='Count')
plt.show()

# Exercise 3: Create a masked CS matrix

In [None]:
# the mask
mask = csM>thresh
# also remove diagonal (done in-place!)
np.fill_diagonal(mask,False)


# the masked matrix
### Note: I changed my code so I don't actually use this matrix in exercise 4,
### but this kind of matrix is used in other graph-theory analyses, so I'll leave the code here :P
csMasked = csM.copy()
csMasked[csM<thresh] = 0

# also remove diagonal (done in-place!)
np.fill_diagonal(csMasked,0)


## visualized
_,axs = plt.subplots(1,3,figsize=(13,4))

axs[0].imshow(csM,vmin=-.2,vmax=1)
axs[0].set(title='Dense matrix')

axs[1].imshow(mask)
axs[1].set(title='Binary mask')

axs[2].imshow(csMasked,vmin=-.2,vmax=1)
axs[2].set(title='Thresholded matrix')

for a in axs:
  a.set(xticks=[],yticks=[],xlabel='Token index',ylabel='Token index')

plt.tight_layout()
plt.show()

# Exercise 4: The circular similarity graph

In [None]:
# values for theta
N = len(tokens)
dth = 1/N
th = np.linspace(0,2*np.pi-dth,N)

# create a figure
plt.figure(figsize=(8,8))

# loop over tokens
for i in range(N):

  # determine dot (marker) size
  dotsize = 3 * np.sqrt(np.sum(mask[i]))

  # continue to next token if this one has weak similarity
  if dotsize==0: continue

  # otherwise, plot and continue!
  plt.plot(np.cos(th[i]),np.sin(th[i]),'ko',markerfacecolor=[.7,.7,.7],markersize=dotsize)


  # annotate the tokens
  if i%2==0:
    plt.text(np.cos(th[i]),np.sin(th[i]),tokenizer.decode([tokens[i]]),
            ha=['right','left'][int(np.cos(th[i])>0)],
            va=['top','bottom'][int(np.sin(th[i])>0)], fontweight='bold')

  # loop over all the other tokens
  for j in range(i+1,N):

    # only draw a line if similarity exceeds the threshold
    if csM[i,j]>thresh:

      # random color
      color = mpl.cm.plasma((csM[i,j]-thresh)/.2)

      # draw it!
      plt.plot([np.cos(th[i]),np.cos(th[j])],[np.sin(th[i]),np.sin(th[j])],zorder=-100,
               color=color,linewidth=csM[i,j])

plt.axis('off')
plt.show()