In [None]:
try: # if in google colab, download necessary python files
  import google.colab
  ! git clone https://github.com/pesvut/opt-tools.git && mv ./opt-tools/src/*.py .
except ModuleNotFoundError:
  pass
! pip install -qq transformers datasets evaluate zstandard welford einops

In [None]:
import torch
import numpy as np
import pandas as pd
import einops
import matplotlib.pyplot as plt

from model import Model
from texts import prepare
from activations import calculate_attn_crossover, \
    delete_ff_and_evaluate, evaluate_all

In [None]:
# Load model and show details about model
opt = Model( "125m", limit=1000 )
opt.show_details( False )

# Pre-pruning of model
pre_removals = []
opt.delete_ff_keys_from_files(pre_removals)

In [None]:
# init panda data frame for saving nice relevant data
df = pd.DataFrame()

# Evaluate model before removal of any neurons
data = evaluate_all( opt, 1e5 )
df = df.append( data, ignore_index=True )
print( df.T )

In [None]:
attn_data = calculate_attn_crossover(opt, 1.6, sample_size=1e5)
print( attn_data.keys() )

# plot the attn_data
fig, ax = plt.subplots(1, 2)
ax[0].imshow( attn_data['removals'] )
ax[1].imshow( np.log2(attn_data['crossover_multiple']) )
plt.show()

In [None]:
# Evaluate model after removing important attention heads
data = evaluate_all( opt, 1e5 )
df = df.append( data, ignore_index=True )
print( df.T )

In [None]:
FREQ_MULTIPLE = 2

for i in range(4):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, FREQ_MULTIPLE )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
for i in range(4,8):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, FREQ_MULTIPLE )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
for i in range(8,12):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, FREQ_MULTIPLE )
    df = df.append( data, ignore_index=True )
    print( df.T )