In [1]:
%load_ext autoreload
%autoreload 2
# Set HuggingFace cache directory to scratch to save space.
import os
os.environ['HUGGINGFACE_HUB_CACHE'] = '/scratch/' + os.environ['USER'] + '/huggingface_cache'
# Optional; can help when memory is tight.
# os.environ['PYTORCH_CUDA_ALLOC_CONF'] = 'expandable_segments:True'

In [2]:
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

model = AutoModelForCausalLM.from_pretrained("microsoft/phi-2", torch_dtype="auto", trust_remote_code=True).to('cuda')
tokenizer = AutoTokenizer.from_pretrained("microsoft/phi-2", trust_remote_code=True)
tokenizer.pad_token_id = tokenizer.eos_token_id

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


In [29]:
from IPython.display import display, HTML, Markdown

def run_coding_sample(model, tokenizer):
    input_string = '''def print_prime(n):
       """
       Print all primes between 1 and n
       """'''
    inputs = tokenizer(input_string, return_tensors="pt", return_attention_mask=True).to('cuda')
    
    outputs = model.generate(**inputs, max_new_tokens=50)
    text = tokenizer.batch_decode(outputs)[0][len(input_string):]

    display(HTML(f"<pre>{input_string}</pre><pre style='background-color: rgb(200, 255, 200, 1.0)'>{text}<pre>"))

In [33]:
from hooked_phi import attach_hooks, detach_hooks

# Returns a hook that can be used to ablate a set of neurons.
def ablate_neurons(mask):
    assert mask.shape[0] == model.config.num_hidden_layers
    assert mask.shape[1] == model.config.intermediate_size

    def hook(neurons, layer_idx):
        neurons[..., ~mask[layer_idx]] = 0
        return neurons

    return hook

# Ablate the last layer MLP.
for num_ablation_layers in range(0, 9, 2):
    display(Markdown(f'# Zeroing-out intermediate layers of the last {num_ablation_layers} MLPs'))
    
    mask = torch.ones((model.config.num_hidden_layers, model.config.intermediate_size), dtype=torch.bool)
    if num_ablation_layers > 0:
        mask[-num_ablation_layers:, :] = False
    
    attach_hooks(model.model, ablate_neurons(mask))
    # detach_hooks(model.model)
    
    # Post-ablation
    run_coding_sample(model, tokenizer)

# Zeroing-out intermediate layers of the last 0 MLPs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


# Zeroing-out intermediate layers of the last 2 MLPs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


# Zeroing-out intermediate layers of the last 4 MLPs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


# Zeroing-out intermediate layers of the last 6 MLPs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


# Zeroing-out intermediate layers of the last 8 MLPs

Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
