In [3]:
from ipywidgets import interact
from ipywidgets.widgets import Dropdown, IntText, Text, FloatText, Button
from funcy import partial
from typing import List
import pandas as pd
import torch
from transformer_lens.HookedTransformer import HookedTransformer

from algebraic_value_editing import completion_utils, analysis, prompt_utils
from algebraic_value_editing.prompt_utils import RichPrompt


In [2]:
device: str = "cuda"
gpt2_xl: HookedTransformer = HookedTransformer.from_pretrained(
    model_name="gpt2-xl",
    device="cpu",
).to(
    device
)  # This reduces GPU memory usage, for some reason


Using pad_token, but it is not set yet.


Loaded pretrained model gpt2-xl into HookedTransformer
Moving model to device:  cuda


NameError: name 'torch' is not defined

In [4]:
# Disable gradients to save memory during inference
_ = torch.set_grad_enabled(False)

num_blocks: int = gpt2_xl.cfg.n_layers

default_kwargs = {
    "temperature": 1,
    "freq_penalty": 1,
    "top_p": 0.3,
    "model": gpt2_xl,
}

get_x_vector_preset = partial(
    prompt_utils.get_x_vector,
    pad_method="tokens_right",
    model=gpt2_xl,
    custom_pad_id=gpt2_xl.to_single_token(" "),
)

# Automating prompt assessment
Using `text-davinci-003`, we automatically rate the extent to which
completions are about a given subject.

In [6]:
wedding_rps: List[RichPrompt] = [
    *get_x_vector_preset(
        prompt1="I talk about weddings constantly",
        prompt2="I do not talk about weddings constantly",
        coeff=3,
        act_name=14,
    ),
    *get_x_vector_preset(
        prompt1="I bring up weddings in every situation",
        prompt2="I do not bring up weddings in every situation",
        coeff=10,
        act_name=22,
    ),
]


wedding_prompt: str = (
    "I hate talking about weddings. Instead, let's talk about a totally"
    " different topic, like the impact of NGDP on the price of gold."
)

wedding_df: pd.DataFrame = completion_utils.gen_normal_and_modified(
    prompt_batch=[wedding_prompt] * 5,
    rich_prompts=wedding_rps,
    seed=0,
    tokens_to_generate=60,
    **default_kwargs,
)
analysis.rate_completions_openai(
    data_frame=wedding_df, criterion="mentions weddings"
)
completion_utils.pretty_print_completions(wedding_df)

Cost upper bound: $0.02
0 5
1 5
2 6
3 0
4 0
5 1
6 0
7 0
8 0
9 2
Average ratings:
is_modified
False    0.6
True     3.2
Name: rating, dtype: float64
+--------------------------------------------------------------+--------------------------------------------------------------+
|                      [1mNormal completions[0m                      |                     [1mModified completions[0m                     |
+--------------------------------------------------------------+--------------------------------------------------------------+
|   [1mI hate talking about weddings. Instead, let's talk about   |   [1mI hate talking about weddings. Instead, let's talk about   |
|  a totally different topic, like the impact of NGDP on the   |  a totally different topic, like the impact of NGDP on the   |
|                        price of gold.[0m                        |                        price of gold.[0m                        |
|                                                   

In [7]:
from ipywidgets import Layout

wide: Layout = Layout(width="auto")

xvec_prompt1 = Text(
    value="I bring up weddings in every situation",
    description="First prompt for the x-vector:",
    layout=wide,
)
xvec_prompt2 = Text(
    value="I do not bring up weddings in every situation",
    description="Second prompt for the x-vector:",
    layout=wide,
)
injection_block = IntText(
    min=0,
    max=num_blocks,
    step=1,
    value=22,
    description="Transformer resid_pre to inject the x-vector into:",
    layout=wide,
)
xvec_coefficient = FloatText(
    min=-500,
    max=500,
    value=3,
    description="X-vector coefficient:",
    layout=wide,
)
prompt = Text(
    value=wedding_prompt, description="Prompt:", disabled=False, layout=wide
)
grading_criterion = Text(
    value="mentions weddings",
    description="Grading criterion:",
    disabled=False,
    layout=wide,
)
openai_model = Dropdown(
    options=["text-davinci-003", "text-curie-001"],
    value="text-davinci-003",
    description="OpenAI model:",
    layout=wide,
)

seed_widget = IntText(
    min=0,
    max=2**10,
    step=1,
    value=0,
    description="Seed:",
    layout=wide,
)

# TODO type and document
widgets = [
    xvec_prompt1,
    xvec_prompt2,
    injection_block,
    xvec_coefficient,
    prompt,
    grading_criterion,
    openai_model,
    seed_widget,
]

display(*widgets)

from ipywidgets import Output
from IPython.display import clear_output

output: Output = Output()

df: pd.DataFrame


def run_completions():
    """Run completions with the given parameters."""
    rich_prompts: List[RichPrompt] = [
        *get_x_vector_preset(
            prompt1=xvec_prompt1.value,
            prompt2=xvec_prompt2.value,
            coeff=xvec_coefficient.value,
            act_name=injection_block.value,
        )
    ]

    df = completion_utils.gen_normal_and_modified(
        prompt_batch=[prompt.value] * 5,
        rich_prompts=rich_prompts,
        seed=seed_widget.value,
        tokens_to_generate=60,
        **default_kwargs,
    )

    # Rate the extent to which the grading criterion is met
    with output:
        clear_output(wait=True)
        analysis.rate_completions_openai(
            data_frame=df,
            criterion=grading_criterion.value,
            model=openai_model.value,
        )
        print(df["rating_text"])
        completion_utils.pretty_print_completions(df)


generate = Button(description="Generate completions")
generate.on_click(lambda _: run_completions())
display(generate)

display(output)

Text(value='I bring up weddings in every situation', description='First prompt for the x-vector:', layout=Layo…

Text(value='I do not bring up weddings in every situation', description='Second prompt for the x-vector:', lay…

IntText(value=22, description='Transformer resid_pre to inject the x-vector into:', layout=Layout(width='auto'…

FloatText(value=3.0, description='X-vector coefficient:', layout=Layout(width='auto'))

Text(value="I hate talking about weddings. Instead, let's talk about a totally different topic, like the impac…

Text(value='mentions weddings', description='Grading criterion:', layout=Layout(width='auto'))

Dropdown(description='OpenAI model:', layout=Layout(width='auto'), options=('gpt-3.5-turbo', 'text-curie-001')…

IntText(value=0, description='Seed:', layout=Layout(width='auto'))

Button(description='Generate completions', style=ButtonStyle())

Output()