# Seperability Insight into OPT models
Tests to see if it is possible to remove coding ability from Meta OPT model for different scales.
Current methods are:
- look at activation frequency of MLP mid layers
- Look at 'crossover threshold' of Attention heads

In [1]:
try: # if in google colab, download necessary python files
  import google.colab
  ! git clone https://github.com/pesvut/opt-tools.git && mv ./opt-tools/src/*.py .
except ModuleNotFoundError:
  pass
! pip3 install -qq transformers datasets evaluate zstandard welford einops

In [None]:
import torch
import numpy as np
import pandas as pd
import einops
import matplotlib.pyplot as plt

from model import Model
from texts import prepare
from activations import calculate_attn_crossover, \
    delete_ff_and_evaluate, evaluate_all

In [None]:
# Load model and show details about model
opt = Model( "125m", limit=1000 )
opt.show_details( False )

# Pre-pruning of model
pre_removals = []
opt.delete_ff_keys_from_files(pre_removals)

In [None]:
# Configure tests ( set value to zero to skip )
model_size, token_limit  = "1.3b", 1000
run_pre_test             = False
attn_crossover_threshold = 0
ff_frac_removed          = 0.02
ff_epsilon               = 0.01
pre_removals = []
run_attn = ( attn_crossover_threshold != 0 )

# init panda data frame for saving nice relevant data
df = pd.DataFrame()

In [None]:
# Evaluate model before removal of any neurons
if run_pre_test:
    data = evaluate_all( opt, 1e5 )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
if run_attn:
    attn_data = calculate_attn_crossover( opt, 1e6 )

In [None]:
if run_attn:
    # Choose Attn Heads to Remove
    removals      = attn_data['crossover_multiple'] > attn_crossover_threshold
    log_crossover = ( np.log2(attn_data['crossover_multiple']) )

    # Plot Attn Heads
    fig, ax = plt.subplots(1, 2)
    ax[0].imshow( attn_data['removals'] )
    ax[1].imshow( np.log2(attn_data['crossover_multiple']) )
    plt.show()

In [None]:
if run_attn:
    # Evaluate model after removing important attention heads
    data = evaluate_all( opt, 1e5 )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
for i in range(0,4):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, ff_frac_removed, ff_epsilon )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
for i in range(4,8):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, ff_frac_removed, ff_epsilon )
    df = df.append( data, ignore_index=True )
    print( df.T )

In [None]:
for i in range(8,12):
    print('\n\n- RUNNING RUN No', i )
    data = delete_ff_and_evaluate( opt, ff_frac_removed, ff_epsilon )
    df = df.append( data, ignore_index=True )
    print( df.T )