# Introduction to pyvene
This tutorial shows simple runnable code snippets of how to do different kinds of interventions on neural networks with pyvene.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stanfordnlp/pyvene/blob/main/pyvene/pyvene_101.ipynb)

In [1]:
__author__ = "Zhengxuan Wu"
__version__ = "01/19/2024"

### Set-up

In [None]:
try:
    # This library is our indicator that the required installs
    # need to be done.
    import pyvene

except ModuleNotFoundError:
    !pip install git+https://github.com/frankaging/pyvene.git

### simplest, auto-cast everywhere

In [3]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

pv_gpt2 = pv.IntervenableModel({
    "layer": 0,
    "component": "mlp_output",
    "source_representation": torch.zeros(
        gpt2.config.n_embd)
}, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"base": 3}
)

loaded model


### standalone config allows more options

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    {
        "layer": _,
        "component": "mlp_output",
        "source_representation": torch.zeros(
            gpt2.config.n_embd)
    } for _ in range(4)],
    mode="parallel"
)
print(config)
pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"base": 3}
)

loaded model
IntervenableConfig
{
    "model_type": "None",
    "representations": [
        {
            "layer": 0,
            "component": "mlp_output",
            "unit": "pos",
            "max_number_of_units": 1,
            "low_rank_dimension": null,
            "intervention_type": null,
            "subspace_partition": null,
            "group_key": null,
            "intervention_link_key": null,
            "moe_key": null,
            "source_representation": "PLACEHOLDER",
            "hidden_source_representation": null
        },
        {
            "layer": 1,
            "component": "mlp_output",
            "unit": "pos",
            "max_number_of_units": 1,
            "low_rank_dimension": null,
            "intervention_type": null,
            "subspace_partition": null,
            "group_key": null,
            "intervention_link_key": null,
            "moe_key": null,
            "source_representation": "PLACEHOLDER",
            "hidden_source_repr

### passing activations at runtime

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

pv_gpt2 = pv.IntervenableModel({
    "layer": 0,
    "component": "mlp_output",
}, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    source_representations = torch.zeros(gpt2.config.n_embd),
    unit_locations={"base": 3}
)

loaded model


### simple addition intervention

In [5]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 0,
    "component": "mlp_input"},
    pv.AdditionIntervention
)

pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The Space Needle is in downtown", 
        return_tensors="pt"
    ), 
    unit_locations={"base": [[[0, 1, 2, 3]]]},
    source_representations = torch.rand(gpt2.config.n_embd)
)

loaded model


### trainable interventions accept additional arguments

In [5]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "block_output",
    "low_rank_dimension": 1},
    pv.LowRankRotatedSpaceIntervention
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

last_hidden_state = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    sources = tokenizer(
        "The capital of Italy is", 
        return_tensors="pt"
    ), 
    unit_locations={"sources->base": 3}
)[-1].last_hidden_state

loss = last_hidden_state.sum()
loss.backward()

loaded model


### collect activations

In [9]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 10,
    "component": "block_output",
    "intervention_type": pv.CollectIntervention}
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"sources->base": 3}
)[0][-1]

loaded model


### collect activations work organically with interventions

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "block_output",
    "intervention_type": pv.VanillaIntervention}
)

config.add_intervention({
    "layer": 10,
    "component": "block_output",
    "intervention_type": pv.CollectIntervention})

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    sources = [tokenizer(
        "The capital of Italy is", 
        return_tensors="pt"
    ), None], 
    unit_locations={"sources->base": 3}
)[0][-1]

loaded model


### Fine-grained intervention on a specific neuron

In [1]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "head_attention_value_output",
    "unit": "h.pos",
    "intervention_type": pv.CollectIntervention}
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={
        "base": pv.GET_LOC((3,3))
    },
    subspaces=[0]
)[0][-1]

loaded model


### New intervention type

In [30]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

class MultiplierIntervention(
  pv.ConstantSourceIntervention):
    def __init__(self, embed_dim, **kwargs):
        super().__init__()
    def forward(
    self, base, source=None, subspaces=None):
        return base * 99.0
# run with new intervention type
pv_gpt2 = pv.IntervenableModel({
  "intervention_type": MultiplierIntervention}, 
  model=gpt2)
intervened_outputs = pv_gpt2(
  base = tokenizer("The capital of Spain is", 
    return_tensors="pt"), 
  unit_locations={"base": 3})

loaded model


### Recurrent NNs

In [24]:
import torch
import pyvene as pv

_, _, gru = pv.create_gru_classifier(
    pv.GRUConfig(h_dim=32))

pv_gru = pv.IntervenableModel({
    "component": "cell_output",
    "unit": "t", 
    "intervention_type": pv.ZeroIntervention},
    model=gru)

rand_t = torch.rand(1,10, gru.config.h_dim)

intervened_outputs = pv_gru(
  base = {"inputs_embeds": rand_t}, 
  unit_locations={"base": 3})

loaded model


### LMs Generation

In [39]:
# built-in helper to get tinystore
_, tokenizer, tinystory = pv.create_gpt_neo()
emb_happy = tinystory.transformer.wte(
    torch.tensor(14628)) * 0.3

pv_tinystory = pv.IntervenableModel([{
    "layer": _,
    "component": "mlp_output",
    "intervention_type": pv.AdditionIntervention
    } for _ in range(
        tinystory.config.num_layers)],
    model=tinystory)

prompt = tokenizer(
    "Once upon a time there was", 
    return_tensors="pt")
_, intervened_story = pv_tinystory.generate(
    prompt,
    source_representations=emb_happy,
    max_length=256
)
print(tokenizer.decode(
    intervened_story[0], 
    skip_special_tokens=True
))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time there was a little girl named Lucy. She was three years old and loved to explore. One day, Lucy was walking in the park when she saw a big, red balloon. She was so excited and wanted to play with it.

But then, a big, mean man came and said, "That balloon is mine! You can't have it!" Lucy was very sad and started to cry.

The man said, "I'm sorry, but I need the balloon for my work. You can have it if you want."

Lucy was so happy and said, "Yes please!" She took the balloon and ran away.

But then, the man said, "Wait! I have an idea. Let's make a deal. If you can guess what I'm going to give you, then you can have the balloon."

Lucy thought for a moment and then said, "I guess I'll have to get the balloon."

The man smiled and said, "That's a good guess! Here you go."

Lucy was so happy and thanked the man. She hugged the balloon and ran off to show her mom.

The end.



### saving and loading

In [1]:
import sys
sys.path.append("../..")

import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

class MultiplierIntervention(
  pv.SourcelessIntervention):
    def __init__(self, embed_dim, **kwargs):
        super().__init__()
        self.register_buffer(
            'interchange_dim', 
            torch.tensor(embed_dim))
    def forward(
    self, base, source=None, subspaces=None):
        return base * 99.0
    def __str__(self):
        return f"MultiplierIntervention()"
# run with new intervention type
pv_gpt2 = pv.IntervenableModel({
  "intervention_type": MultiplierIntervention}, 
  model=gpt2)

pv_gpt2.save("./tmp/")

loaded model
Directory './tmp/' already exists.


In [4]:
pv.IntervenableModel.load(
    "./tmp/",
    model=gpt2)



IntervenableModel(
  (model): GPT2Model(
    (wte): Embedding(50257, 768)
    (wpe): Embedding(1024, 768)
    (drop): Dropout(p=0.1, inplace=False)
    (h): ModuleList(
      (0-11): 12 x GPT2Block(
        (ln_1): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (attn): GPT2Attention(
          (c_attn): Conv1D()
          (c_proj): Conv1D()
          (attn_dropout): Dropout(p=0.1, inplace=False)
          (resid_dropout): Dropout(p=0.1, inplace=False)
        )
        (ln_2): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
        (mlp): GPT2MLP(
          (c_fc): Conv1D()
          (c_proj): Conv1D()
          (act): NewGELUActivation()
          (dropout): Dropout(p=0.1, inplace=False)
        )
      )
    )
    (ln_f): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  )
)

### The End
Now you are graduating from pyvene 101! Feel free to take a look at our tutorials for more challenging interventions.