|<h2>Course:</h2>|<h1><a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">A deep understanding of AI language model mechanisms</a></h1>|
|-|:-:|
|<h2>Part 5:</h2>|<h1>Observation (non-causal) mech interp<h1>|
|<h2>Section:</h2>|<h1>Investigating layers<h1>|
|<h2>Lecture:</h2>|<h1><b>Mutual information: theory and code<b></h1>|

<br>

<h5><b>Teacher:</b> Mike X Cohen, <a href="https://sincxpress.com" target="_blank">sincxpress.com</a></h5>
<h5><b>Course URL:</b> <a href="https://udemy.com/course/dullms_x/?couponCode=202508" target="_blank">udemy.com/course/dullms_x/?couponCode=202508</a></h5>
<i>Using the code without the course may lead to confusion or errors.</i>

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# MI
from sklearn.feature_selection import mutual_info_regression

# vector matplotlib
import matplotlib_inline.backend_inline
matplotlib_inline.backend_inline.set_matplotlib_formats('svg')

# Create and visualize data

In [None]:
# create and discretize data
N = 200
x = np.random.randn(N)
y = np.random.randn(N) + x

# 2D histogram
Z,xx,yy = np.histogram2d(x,y,bins=15)

_,axs = plt.subplots(1,2,figsize=(11,5))
axs[0].plot(x,y,'bo',markerfacecolor=[.7,.7,.9])
axs[0].set(xlabel='x',ylabel='y',title='Full resolution data')

h = axs[1].imshow(Z.T,extent=[xx[0],xx[-1],yy[0],yy[-1]],vmin=0,vmax=Z.max()*.7,origin='lower',aspect='auto',cmap='hot')
axs[1].set(xlabel='x',ylabel='y',title='Discretized (binned) data')
axs[1].plot(x,y,'wo',markerfacecolor=[.4,.4,.4],alpha=.7)
plt.colorbar(h,ax=axs[1],pad=.01,label='Count')

plt.tight_layout()
plt.show()

# Convert to proportion (estimate of probability)

In [None]:
# proportion via sum-scaling
p_Z = Z / Z.sum()
p_x = np.sum(p_Z, axis=1)
p_y = np.sum(p_Z, axis=0)

# calculate entropy
eps = 1e-13
entropy_x = -np.sum( p_x * np.log2(p_x+eps) )
entropy_y = -np.sum( p_y * np.log2(p_y+eps) )

# visualize and label
plt.figure(figsize=(10,4))
plt.plot((xx[1:]+xx[:-1])/2,p_x,label=f'x (entropy = {entropy_x:.3f})')
plt.plot((yy[1:]+yy[:-1])/2,p_y,label=f'y (entropy = {entropy_y:.3f})')

plt.legend()
plt.gca().set(xlabel='Data values (x or y)',ylabel='Proportion',title='Marginal distributions of data')
plt.show()

# Mutual information

In [None]:
# as difference of entropies
entropy_Z = -np.sum(p_Z * np.log2(p_Z + eps))
miEps = (entropy_x+entropy_y) - entropy_Z
miEps

# Should be better: equal-frequency bins

In [None]:
quantiles = np.linspace(0, 100, 15)
x_edges = np.percentile(x, quantiles)
y_edges = np.percentile(y, quantiles)

# 2D histogram
Z = np.histogram2d(x,y,bins=(x_edges, y_edges))[0]
p_Z = Z / N

# only consider bins with data
nonzero = p_Z > 0

# marginal distributions
p_x = p_Z.sum(axis=1)
p_y = p_Z.sum(axis=0)

# calculate entropies
Hx = -np.sum(p_x[p_x>0] * np.log2(p_x[p_x>0]))
Hy = -np.sum(p_y[p_y>0] * np.log2(p_y[p_y>0]))
Hxy = -np.sum(p_Z[nonzero] * np.log2(p_Z[nonzero]))

# mutual information
miEqual = Hx + Hy - Hxy
miEqual

# The most accurate method: sklearn

In [None]:
miSk = mutual_info_regression(x.reshape(-1,1),y)
miSk

In [None]:
# comparison
print('Expected MI for randn variables:     0')
print(f'Using equal-spaced bins and eps:   {miEps:.3f}')
print(f'Using frequency binning and zero:  {miEqual:.3f}')
print(f'Using sklearn regression function: {miSk[0]:.3f}')