In [None]:
import torch
from transformers import AutoTokenizer, AutoModelForMaskedLM

from separability import Model
m = Model("roberta-base")

In [None]:
pre_out = m.get_attn_pre_out_activations("Hello my name is <mask>")
print(pre_out.shape)

In [None]:
opt = Model("facebook/opt-125m")
print(opt.get_attn_pre_out_activations("Hello my name is").shape)


In [None]:
roberta_sample(m, "We can see if this is working by using a sample text")

In [None]:
from separability.model import detached, pad_zeros
from torch import Tensor

ff_activations = {}

def get_activation_of(name : str):
    # Define hook function which adds output to self.activations
    def hook(_model, _input, output):
        ff_activations[name] = detached( _input )
    return hook

def register_activations(model: Model):
    # register the forward hook
    for layer_index, layer in enumerate(model.layers):
        attn_out = layer["attn.out"]
        name = pad_zeros( layer_index ) + "-attention-out"
        # print( f"registering : ({name}), OPTAttention layer" )
        attn_out.register_forward_hook( get_activation_of( name ) )
        continue
    print( f" - Registered {layer_index} Attention Out Layers" )
    
def get_recent_activations():
    """
    Returns a list of output tuples \
    ( "##-attention-out", output, attn_weights, key_values ) \
    from each attention block
    """
    layers = []
    for key, value in ff_activations.items():
        layer = []
        layer.append( key )
        for out in value:
            if isinstance(out, Tensor):
                layer.append( out )
                continue

            if out is None:
                continue

            if isinstance(out, (tuple, list)):
                for o in out:
                    layer.append( o )

        layers.append(layer)

    return layers

register_activations( m )

In [None]:
print(m.get_residual_stream(text="the cat sat on the mat").shape)

In [None]:
act = 

In [None]:
for a in act:
    print(a[0])
    print(a[1].shape)
    print(a[2].shape)
    print(a[1])
    print(a[2])

In [None]:
tokenizer = AutoTokenizer.from_pretrained('roberta-base')
model = AutoModelForMaskedLM.from_pretrained('roberta-base')
print(model)

with torch.no_grad():
    input_ids = tokenizer.encode("This is an example of a <mask> model.", return_tensors="pt")

    output = model.roberta(input_ids, output_hidden_states=False).last_hidden_state
    print(output.shape)
   
    logits = model.lm_head(output) 
    print(logits.shape)
    
    output_ids = torch.argmax(logits, dim=-1)
    print(tokenizer.batch_decode(output_ids))
    

In [None]:
print(model.roberta.encoder.layer[0].attention.self.query.weight.shape)

In [None]:
from separability import Model
from separability.eval import evaluate

opt = Model("facebook/galactica-125m", 1000)
evaluate(opt, "mmlu:all")

In [None]:
from separability import Model
from separability.eval import evaluate
m = Model("roberta-large", 512)
evaluate(m, "mmlu:high_school_mathematics", n_shot=4)

In [None]:
from separability.prune import run_pruning
from separability.data_classes import PruningConfig

c=PruningConfig(**{
    "model_repo": "roberta-large",
    "wandb_project": "testing-roberta",
    "focus": "pile_codeless",
    "cripple": "code",
    "token_limit": 512,
    "svd_attn": False,
    "attn_mode": "pre-out",
    "ff_scoring": "abs",
    "attn_scoring": "abs",
    "run_pre_test": True,
    "collection_sample_size": 1e4,
    "eval_sample_size": 1e4,
    "ff_frac": 0.05,
    "attn_frac": 0.05,
})

run_pruning(c)

In [None]:
print(m.layers[0])