# LMs can handle linear combinations of prompts
We survey a range of transformers, including:
- Eleuther models
- OPT models
- SOLU models
- GPT-2, both small and XL
- Vicuna, a 13B finetuned model **???**

In [1]:
# Imports
try:
    import algebraic_value_editing
except ImportError:
    commit = "15bcf55"  # Stable commit
    get_ipython().run_line_magic(  # type: ignore
        magic_name="pip",
        line=(
            "install -U"
            f" git+https://github.com/montemac/algebraic_value_editing.git@{commit}"
        ),
    )


In [8]:
import torch
import pandas as pd
from typing import List, Dict

from transformer_lens.HookedTransformer import HookedTransformer

from algebraic_value_editing import hook_utils, prompt_utils, completion_utils
from algebraic_value_editing.prompt_utils import ActivationAddition

In [6]:
DEVICE: str = "cpu"  # Default device
DEFAULT_KWARGS: Dict = {
    "seed": 0,
    "temperature": 1.0,
    "freq_penalty": 1.0,
    "top_p": 0.3,
    "num_comparisons": 10,
}


def load_model_tl(model_name: str, device: str = "cpu") -> HookedTransformer:
    """Loads a model on CPU and then transfers it to the device."""
    model: HookedTransformer = HookedTransformer.from_pretrained(
        model_name, device="cpu"
    )
    _ = model.to(device)
    return model


# Save memory by not computing gradients
_ = torch.set_grad_enabled(False)
torch.manual_seed(0)  # For reproducibility

<torch._C.Generator at 0x7f88ae7b9e50>

## Starting off with GPT-2 small
We use "activation additions" to combine prompts.

In [4]:
gpt2small: HookedTransformer = load_model_tl(model_name="gpt2", device=DEVICE)


Using pad_token, but it is not set yet.


Loaded pretrained model gpt2 into HookedTransformer
Moving model to device:  cpu


In [22]:
goose_ufo_prompts: List[ActivationAddition] = [
    ActivationAddition(prompt="Lots of geese are chasing UFOs", coeff=2.0, act_name=0),
]
prompt: str = (
    "John left the store and went outside. He saw his friend who said 'Hey,"
    " your name is"
)

print(gpt2small.to_str_tokens(goose_ufo_prompts[0].prompt))
print(gpt2small.to_str_tokens(prompt))


['<|endoftext|>', 'Lots', ' of', ' ge', 'ese', ' are', ' chasing', ' UFOs']
['<|endoftext|>', 'John', ' left', ' the', ' store', ' and', ' went', ' outside', '.', ' He', ' saw', ' his', ' friend', ' who', ' said', " '", 'Hey', ',', ' your', ' name', ' is']


In [None]:
completion_utils.print_n_comparisons(
    model=gpt2small,
    prompt=prompt,
    activation_additions=goose_ufo_prompts,
    **DEFAULT_KWARGS
)


Now let's use a word-count metric to see how often geese are mentioned
in the generated text, across coefficients of the activation addition.

In [None]:
model.t