# Introduction to pyvene
This tutorial shows simple runnable code snippets of how to do different kinds of interventions on neural networks with pyvene.

[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/stanfordnlp/pyvene/blob/main/pyvene/pyvene_101.ipynb)

In [1]:
__author__ = "Zhengxuan Wu"
__version__ = "01/19/2024"

### Set-up

In [None]:
try:
    # This library is our indicator that the required installs
    # need to be done.
    import pyvene

except ModuleNotFoundError:
    !pip install git+https://github.com/frankaging/pyvene.git

## pyvene 101

### simplest, auto-cast everywhere

In [3]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

pv_gpt2 = pv.IntervenableModel({
    "layer": 0,
    "component": "mlp_output",
    "source_representation": torch.zeros(
        gpt2.config.n_embd)
}, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"base": 3}
)

loaded model


### standalone config allows more options

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    {
        "layer": _,
        "component": "mlp_output",
        "source_representation": torch.zeros(
            gpt2.config.n_embd)
    } for _ in range(4)],
    mode="parallel"
)
print(config)
pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"base": 3}
)

loaded model
IntervenableConfig
{
    "model_type": "None",
    "representations": [
        {
            "layer": 0,
            "component": "mlp_output",
            "unit": "pos",
            "max_number_of_units": 1,
            "low_rank_dimension": null,
            "intervention_type": null,
            "subspace_partition": null,
            "group_key": null,
            "intervention_link_key": null,
            "moe_key": null,
            "source_representation": "PLACEHOLDER",
            "hidden_source_representation": null
        },
        {
            "layer": 1,
            "component": "mlp_output",
            "unit": "pos",
            "max_number_of_units": 1,
            "low_rank_dimension": null,
            "intervention_type": null,
            "subspace_partition": null,
            "group_key": null,
            "intervention_link_key": null,
            "moe_key": null,
            "source_representation": "PLACEHOLDER",
            "hidden_source_repr

### passing activations at runtime

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

pv_gpt2 = pv.IntervenableModel({
    "layer": 0,
    "component": "mlp_output",
}, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    source_representations = torch.zeros(gpt2.config.n_embd),
    unit_locations={"base": 3}
)

loaded model


### simple addition intervention

In [5]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 0,
    "component": "mlp_input"},
    pv.AdditionIntervention
)

pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

intervened_outputs = pv_gpt2(
    base = tokenizer(
        "The Space Needle is in downtown", 
        return_tensors="pt"
    ), 
    unit_locations={"base": [[[0, 1, 2, 3]]]},
    source_representations = torch.rand(gpt2.config.n_embd)
)

loaded model


### trainable interventions accept additional arguments

In [5]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "block_output",
    "low_rank_dimension": 1},
    pv.LowRankRotatedSpaceIntervention
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

last_hidden_state = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    sources = tokenizer(
        "The capital of Italy is", 
        return_tensors="pt"
    ), 
    unit_locations={"sources->base": 3}
)[-1].last_hidden_state

loss = last_hidden_state.sum()
loss.backward()

loaded model


### collect activations

In [9]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 10,
    "component": "block_output",
    "intervention_type": pv.CollectIntervention}
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={"sources->base": 3}
)[0][-1]

loaded model


### collect activations work organically with interventions

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "block_output",
    "intervention_type": pv.VanillaIntervention}
)

config.add_intervention({
    "layer": 10,
    "component": "block_output",
    "intervention_type": pv.CollectIntervention})

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    sources = [tokenizer(
        "The capital of Italy is", 
        return_tensors="pt"
    ), None], 
    unit_locations={"sources->base": 3}
)[0][-1]

loaded model


### Fine-grained intervention on a specific neuron

In [1]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig({
    "layer": 8,
    "component": "head_attention_value_output",
    "unit": "h.pos",
    "intervention_type": pv.CollectIntervention}
)

pv_gpt2 = pv.IntervenableModel(
    config, model=gpt2)

collected_activations = pv_gpt2(
    base = tokenizer(
        "The capital of Spain is", 
        return_tensors="pt"
    ), 
    unit_locations={
        "base": pv.GET_LOC((3,3))
    },
    subspaces=[0]
)[0][-1]

loaded model


### New intervention type

In [30]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

class MultiplierIntervention(
  pv.ConstantSourceIntervention):
    def __init__(self, embed_dim, **kwargs):
        super().__init__()
    def forward(
    self, base, source=None, subspaces=None):
        return base * 99.0
# run with new intervention type
pv_gpt2 = pv.IntervenableModel({
  "intervention_type": MultiplierIntervention}, 
  model=gpt2)
intervened_outputs = pv_gpt2(
  base = tokenizer("The capital of Spain is", 
    return_tensors="pt"), 
  unit_locations={"base": 3})

loaded model


### Recurrent NNs

In [24]:
import torch
import pyvene as pv

_, _, gru = pv.create_gru_classifier(
    pv.GRUConfig(h_dim=32))

pv_gru = pv.IntervenableModel({
    "component": "cell_output",
    "unit": "t", 
    "intervention_type": pv.ZeroIntervention},
    model=gru)

rand_t = torch.rand(1,10, gru.config.h_dim)

intervened_outputs = pv_gru(
  base = {"inputs_embeds": rand_t}, 
  unit_locations={"base": 3})

loaded model


### LMs Generation

In [39]:
import torch
import pyvene as pv

# built-in helper to get tinystore
_, tokenizer, tinystory = pv.create_gpt_neo()
emb_happy = tinystory.transformer.wte(
    torch.tensor(14628)) * 0.3

pv_tinystory = pv.IntervenableModel([{
    "layer": _,
    "component": "mlp_output",
    "intervention_type": pv.AdditionIntervention
    } for _ in range(
        tinystory.config.num_layers)],
    model=tinystory)

prompt = tokenizer(
    "Once upon a time there was", 
    return_tensors="pt")
_, intervened_story = pv_tinystory.generate(
    prompt,
    source_representations=emb_happy,
    max_length=256
)
print(tokenizer.decode(
    intervened_story[0], 
    skip_special_tokens=True
))

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Once upon a time there was a little girl named Lucy. She was three years old and loved to explore. One day, Lucy was walking in the park when she saw a big, red balloon. She was so excited and wanted to play with it.

But then, a big, mean man came and said, "That balloon is mine! You can't have it!" Lucy was very sad and started to cry.

The man said, "I'm sorry, but I need the balloon for my work. You can have it if you want."

Lucy was so happy and said, "Yes please!" She took the balloon and ran away.

But then, the man said, "Wait! I have an idea. Let's make a deal. If you can guess what I'm going to give you, then you can have the balloon."

Lucy thought for a moment and then said, "I guess I'll have to get the balloon."

The man smiled and said, "That's a good guess! Here you go."

Lucy was so happy and thanked the man. She hugged the balloon and ran off to show her mom.

The end.



### saving and loading

In [4]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

# run with new intervention type
pv_gpt2 = pv.IntervenableModel({
  "intervention_type": pv.ZeroIntervention}, 
  model=gpt2)

pv_gpt2.save("./tmp/")

loaded model
Directory './tmp/' already exists.


In [5]:
pv_gpt2 = pv.IntervenableModel.load(
    "./tmp/",
    model=gpt2)



## to add a little more complexity

### intervention grouping

In [3]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    {"layer": 0, "component": "block_output", "group_key": 0},
    {"layer": 2, "component": "block_output", "group_key": 0}],
    intervention_types=pv.VanillaIntervention,
)

pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

base = tokenizer("The capital of Spain is", return_tensors="pt")
sources = [tokenizer("The capital of Italy is", return_tensors="pt")]
intervened_outputs = pv_gpt2(
    base, sources, 
    {"sources->base": ([
        [[3]], [[4]] # these two are for two interventions
    ], [             # source position 3 into base position 4
        [[3]], [[4]] 
    ])}
)

loaded model


### dynamically intervention skipping

In [8]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    # these are equivalent interventions
    # we create them on purpose
    {"layer": 0, "component": "block_output"},
    {"layer": 0, "component": "block_output"},
    {"layer": 0, "component": "block_output"}],
    intervention_types=pv.VanillaIntervention,
)
pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

base = tokenizer("The capital of Spain is", return_tensors="pt")
source = tokenizer("The capital of Italy is", return_tensors="pt")
# skipping 1, 2 and 3
_, pv_out1 = pv_gpt2(base, [None, None, source],
    {"sources->base": ([None, None, [[4]]], [None, None, [[4]]])})
_, pv_out2 = pv_gpt2(base, [None, source, None],
    {"sources->base": ([None, [[4]], None], [None, [[4]], None])})
_, pv_out3 = pv_gpt2(base, [source, None, None],
    {"sources->base": ([[[4]], None, None], [[[4]], None, None])})
# should have the same results
print(
    torch.equal(pv_out1.last_hidden_state, pv_out2.last_hidden_state),
    torch.equal(pv_out2.last_hidden_state, pv_out3.last_hidden_state)
)

loaded model
True True


### intervening on subspace

In [13]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    # they are linked to manipulate the same representation
    # but in different subspaces
    {"layer": 0, "component": "block_output",
     # subspaces can be partitioned into continuous chunks
     # [i, j] are the boundary indices
     "subspace_partition": [[0, 128], [128, 256]]}],
    intervention_types=pv.VanillaIntervention,
)
pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

base = tokenizer("The capital of Spain is", return_tensors="pt")
source = tokenizer("The capital of Italy is", return_tensors="pt")

# using intervention skipping for subspace
intervened_outputs = pv_gpt2(
    base, [source],
    {"sources->base": 4},
    # intervene only only dimensions from 128 to 256
    subspaces=1,
)

loaded model


### linked/coupled interventions for weights sharing and subspace interventions

In [5]:
import torch
import pyvene as pv

_, tokenizer, gpt2 = pv.create_gpt2()

config = pv.IntervenableConfig([
    # they are linked to manipulate the same representation
    # but in different subspaces
    {"layer": 0, "component": "block_output", 
     "subspace_partition": [[0, 128], [128, 256]], "intervention_link_key": 0},
    {"layer": 0, "component": "block_output",
     "subspace_partition": [[0, 128], [128, 256]], "intervention_link_key": 0}],
    intervention_types=pv.VanillaIntervention,
)
pv_gpt2 = pv.IntervenableModel(config, model=gpt2)

base = tokenizer("The capital of Spain is", return_tensors="pt")
source = tokenizer("The capital of Italy is", return_tensors="pt")

# using intervention skipping for subspace
_, pv_out1 = pv_gpt2(
    base, [None, source],
    # 4 means token position 4
    {"sources->base": ([None, [[4]]], [None, [[4]]])},
    # 1 means the second partition in the config
    subspaces=[None, [[1]]],
)
_, pv_out2 = pv_gpt2(
    base,
    [source, None],
    {"sources->base": ([[[4]], None], [[[4]], None])},
    subspaces=[[[1]], None],
)
print(torch.equal(pv_out1.last_hidden_state, pv_out2.last_hidden_state))

# subspaces provide a list of index and they can be in any order
_, pv_out3 = pv_gpt2(
    base,
    [source, source],
    {"sources->base": ([[[4]], [[4]]], [[[4]], [[4]]])},
    subspaces=[[[0]], [[1]]],
)
_, pv_out4 = pv_gpt2(
    base,
    [source, source],
    {"sources->base": ([[[4]], [[4]]], [[[4]], [[4]]])},
    subspaces=[[[1]], [[0]]],
)
print(torch.equal(pv_out3.last_hidden_state, pv_out4.last_hidden_state))

loaded model
True
True


### adding new model types

In [4]:
import torch
import pyvene as pv

# get a flan-t5 from HuggingFace
from transformers import T5ForConditionalGeneration, T5Tokenizer, T5Config
config = T5Config.from_pretrained("google/flan-t5-small")
tokenizer = T5Tokenizer.from_pretrained("google/flan-t5-small")
t5 = T5ForConditionalGeneration.from_pretrained(
    "google/flan-t5-small", config=config
)

# config the intervention mapping with pv global vars
"""Only define for the block output here for simplicity"""
pv.type_to_module_mapping[type(t5)] = {
    "mlp_output": ("encoder.block[%s].layer[1]", 
                   pv.models.constants.CONST_OUTPUT_HOOK),
    "attention_input": ("encoder.block[%s].layer[0]", 
                        pv.models.constants.CONST_OUTPUT_HOOK),
}
pv.type_to_dimension_mapping[type(t5)] = {
    "mlp_output": ("d_model",),
    "attention_input": ("d_model",),
    "block_output": ("d_model",),
    "head_attention_value_output": ("d_model/num_heads",),
}

# wrap as gpt2
pv_t5 = pv.IntervenableModel({
    "layer": 0,
    "component": "mlp_output",
    "source_representation": torch.zeros(
        t5.config.d_model)
}, model=t5)

# then intervene!
base = tokenizer("The capital of Spain is", 
                 return_tensors="pt")
decoder_input_ids = tokenizer(
    "", return_tensors="pt").input_ids
base["decoder_input_ids"] = decoder_input_ids
intervened_outputs = pv_t5(
    base, 
    unit_locations={"base": 3}
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


### The End
Now you are graduating from pyvene 101! Feel free to take a look at our tutorials for more challenging interventions.