In [1]:
from lmexp.models.implementations.gpt2small import GPT2Tokenizer, SteerableGPT2
from lmexp.latent_text_sampling.main import (
    activations_svd,
    plot_svd_result,
    generate_steering_configs,
)
from lmexp.generic.get_locations import after_search_tokens, all_tokens

In [2]:
model = SteerableGPT2()
tokenizer = GPT2Tokenizer()

In [3]:
sports_and_games = [
    "Soccer",
    "Basketball",
    "Baseball",
    "Tennis",
    "Cricket",
    "Rugby",
    "Golf",
    "Hockey",
    "Volleyball",
    "Badminton",
    "Table Tennis",
    "Swimming",
    "Track and Field",
    "Cycling",
    "Boxing",
    "Martial Arts",
    "Wrestling",
    "Gymnastics",
    "Skateboarding",
    "Surfing",
    "Skiing",
    "Snowboarding",
    "Figure Skating",
    "Ice Hockey",
    "Lacrosse",
    "Handball",
    "Bowling",
    "Darts",
    "Billiards",
    "Squash",
    "Racquetball",
    "Polo",
    "Fencing",
    "Archery",
    "Horse Racing",
    "Rowing",
    "Kayaking",
    "Canoeing",
    "Sailing",
    "Triathlon",
    "Badminton",
    "Softball",
    "American Football",
    "Sumo Wrestling",
    "Chess",
    "Poker",
    "Esports",
    "Ultimate Frisbee",
    "Rock Climbing",
    "Bocce Ball",
]
prompts = [f"An example of an activity is: {activity}" for activity in sports_and_games]

In [4]:
search_tokens = tokenizer.encode("An example of an activity is:")[0][:-1]
print(
    f"We get PCs of activations after the '{tokenizer.decode(search_tokens)}' tokens"
)

We get PCs of activations after the 'An example of an activity is' tokens


In [5]:
directions = activations_svd(
    examples=prompts,
    hooked_model=model,
    tokenizer=tokenizer,
    layers=list(range(0, model.n_layers)),
    token_location_fn=after_search_tokens,
    search_tokens=search_tokens,
    batch_size=6,
)

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)
100%|██████████| 9/9 [00:00<00:00, 19.91it/s]


In [6]:
directions.keys()

dict_keys([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])

In [27]:
layer = 5
rank = 2
configs = generate_steering_configs(directions, layer, rank, None, all_tokens)
print([c.scale for c in configs])
res = model.generate_with_steering(
    text=["I like"],
    tokenizer=tokenizer,
    steering_configs=configs,
    max_n_tokens=35,
    save_to=None,
)
print(res["results"][0]["output"])

[32.59605333888253, 0.8167257781281085]
I like to think that the world is a little bit more than a little bit more than a little bit more than a little bit more than a little bit more than a little


In [8]:
layer = 5
plot_svd_result(
    examples=[p.split(": ")[-1] for p in prompts],
    U=directions[layer][0],
    S=directions[layer][1],
)