# Setup

In [1]:
from neel.imports import *
from solu.microscope.microscope import *

pio.renderers.default = "vscode"
torch.set_grad_enabled(False)

<torch.autograd.grad_mode.set_grad_enabled at 0x7fb3a9f28750>

In [2]:
data = get_c4_code()
data

Loading cached shuffled indices for dataset at /workspace/data/code_valid_tokens.hf/cache-a999c9bbde4b7b86.arrow


13


Dataset({
    features: ['tokens'],
    num_rows: 1657102
})

In [4]:
max_act_store = MaxActStore.load("solu-1l-c4-code_v3L0")
print(max_act_store.index[0])
print(max_act_store.max_acts[0])
max_act_store.max_acts = max_act_store.max_acts.cpu()
max_act_store.index = max_act_store.index.cpu()
max_act_store.max_acts, indices = max_act_store.max_acts.sort(descending=True)
max_act_store.index = max_act_store.index.gather(-1, indices)
print(max_act_store.index[0])
print(max_act_store.max_acts[0])

tensor([ 508747, 1171978, 1337846,  833884, 1047290,    5108,  215992,  206564,
         520548,  720179, 1246030, 1510513,  555884,  431365,  796298,  649472,
        1592446,  527005,  774220, 1419926], device='cuda:0')
tensor([0.0335, 0.0353, 0.0330, 0.0324, 0.0329, 0.0329, 0.0340, 0.0337, 0.0363,
        0.0350, 0.0349, 0.0326, 0.0377, 0.0368, 0.0380, 0.0338, 0.0368, 0.0344,
        0.0399, 0.0332], device='cuda:0')
tensor([ 774220,  796298,  555884,  431365, 1592446,  520548, 1171978,  720179,
        1246030,  527005,  215992,  649472,  206564,  508747, 1419926, 1337846,
           5108, 1047290, 1510513,  833884])
tensor([0.0399, 0.0380, 0.0377, 0.0368, 0.0368, 0.0363, 0.0353, 0.0350, 0.0349,
        0.0344, 0.0340, 0.0338, 0.0337, 0.0335, 0.0332, 0.0330, 0.0329, 0.0329,
        0.0326, 0.0324])


In [5]:
model = HookedTransformer.from_pretrained("solu-1l")

Moving model to device:  cuda
Finished loading pretrained model solu-1l into HookedTransformer!


## Interactive Vis

In [6]:
import gradio as gr
from transformer_lens import HookedTransformer
from transformer_lens.utils import to_numpy
from IPython.display import HTML

default_neuron_index = 652
default_text = "The following is a list of powers of 10: 1, 10, 100, 1000, 10000, 100000, 1000000, 10000000"
print(model.to_str_tokens(default_text))
default_max_val = 1.0
default_min_val = 0.0
# print(get_neuron_acts(default_text, default_layer, default_neuron_index))

['<|BOS|>', 'The', ' following', ' is', ' a', ' list', ' of', ' powers', ' of', ' 1', '0', ':', ' 1', ',', ' 1', '0', ',', ' 1', '0', '0', ',', ' 1', '0', '0', '0', ',', ' 1', '0', '0', '0', '0', ',', ' 1', '0', '0', '0', '0', '0', ',', ' 1', '0', '0', '0', '0', '0', '0', ',', ' 1', '0', '0', '0', '0', '0', '0', '0']


In [7]:
def get_neuron_acts(text, neuron_index):
    # Hacky way to get out state from a single hook - we have a single element list and edit that list within the hook.
    cache = {}

    def caching_hook(act, hook):
        cache["activation"] = act[0, :, neuron_index]

    model.run_with_hooks(
        text, fwd_hooks=[(f"blocks.0.mlp.hook_mid", caching_hook)], return_type=None
    )
    return to_numpy(cache["activation"])


# This is some CSS (tells us what style )to give each token a thin gray border, to make it easy to see token separation
style_string = """<style> 
    span.token {
        border: 1px solid rgb(123, 123, 123);
        white-space: pre;
        } 
    </style>"""


def calculate_color(val, max_val, min_val):
    # Hacky code that takes in a value val in range [min_val, max_val], normalizes it to [0, 1] and returns a color which interpolates between slightly off-white and red (0 = white, 1 = red)
    # We return a string of the form "rgb(240, 240, 240)" which is a color CSS knows
    normalized_val = (val - min_val) / max_val
    return f"rgb(240, {240*(1-normalized_val)}, {240*(1-normalized_val)})"


def shitty_neuron_vis(text, neuron_index, max_val=None, min_val=None):
    """
    text: The text to visualize
    neuron_index: The neuron index
    max_val: The top end of our activation range, defaults to the maximum activation
    min_val: The top end of our activation range, defaults to the minimum activation

    Returns a string of HTML that displays the text with each token colored according to its activation

    Note: It's useful to be able to input a fixed max_val and min_val, because otherwise the colors will change as you edit the text, which is annoying.
    """
    if neuron_index is None:
        return "Please select a Neuron"
    acts = get_neuron_acts(text, neuron_index)
    act_max = acts.max()
    act_min = acts.min()
    # Defaults to the max and min of the activations
    if max_val is None:
        max_val = act_max
    if min_val is None:
        min_val = act_min
    # We want to make a list of HTML strings to concatenate into our final HTML string
    # We first add the style to make each token element have a nice border
    htmls = [style_string]
    # We then add some text to tell us what layer and neuron we're looking at - we're just dealing with strings and can use f-strings as normal
    # h4 means "small heading"
    htmls.append(f"<h4>Neuron: <b>L0N{neuron_index}</b></h4>")
    # We then add a line telling us the limits of our range
    htmls.append(
        f"<h4>Max Range: <b>{max_val:.4f}</b>. Min Range: <b>{min_val:.4f}</b></h4>"
    )
    # If we added a custom range, print a line telling us the range of our activations too.
    if act_max != max_val or act_min != min_val:
        htmls.append(
            f"<h4>Custom Range Set. Max Act: <b>{act_max:.4f}</b>. Min Act: <b>{act_min:.4f}</b></h4>"
        )
    # Convert the text to a list of tokens
    str_tokens = model.to_str_tokens(text)
    for tok, act in zip(str_tokens, acts):
        # A span is an HTML element that lets us style a part of a string (and remains on the same line by default)
        # We set the background color of the span to be the color we calculated from the activation
        # We set the contents of the span to be the token
        htmls.append(
            f"<span class='token' style='background-color:{calculate_color(act, max_val, min_val)}' >{tok}</span>"
        )

    return "".join(htmls)


default_text = """Hello World!
Hillo            Wrldo"""
default_html_string = shitty_neuron_vis(
    default_text, default_neuron_index, default_max_val, default_min_val
)
display(HTML(default_html_string))
print(default_html_string)

<style> 
    span.token {
        border: 1px solid rgb(123, 123, 123);
        white-space: pre;
        } 
    </style><h4>Neuron: <b>L0N652</b></h4><h4>Max Range: <b>1.0000</b>. Min Range: <b>0.0000</b></h4><h4>Custom Range Set. Max Act: <b>0.0054</b>. Min Act: <b>-0.0002</b></h4><span class='token' style='background-color:rgb(240, 239.6178517397493, 239.6178517397493)' ><|BOS|></span><span class='token' style='background-color:rgb(240, 239.76546071469784, 239.76546071469784)' >Hello</span><span class='token' style='background-color:rgb(240, 240.04570794291794, 240.04570794291794)' > World</span><span class='token' style='background-color:rgb(240, 240.04762511816807, 240.04762511816807)' >!</span><span class='token' style='background-color:rgb(240, 240.04258611705154, 240.04258611705154)' >
</span><span class='token' style='background-color:rgb(240, 238.69838420301676, 238.69838420301676)' >H</span><span class='token' style='background-color:rgb(240, 240.0313625705894, 240.031362570

In [8]:
# The `with gr.Blocks() as demo:` syntax just creates a variable called demo containing all these components
with gr.Blocks() as demo:
    gr.HTML(value=f"Hacky Interactive Lexoscope for SoLU 1L")
    # The input elements
    with gr.Row():
        with gr.Column():
            text = gr.Textbox(label="Text", value=default_text)
            # Precision=0 makes it an int, otherwise it's a float
            # Value sets the initial default value
            neuron_index = gr.Number(
                label="Neuron Index", value=default_neuron_index, precision=0
            )
            # If empty, these two map to None
            max_val = gr.Number(label="Max Value", value=default_max_val)
            min_val = gr.Number(label="Min Value", value=default_min_val)
            inputs = [text, neuron_index, max_val, min_val]
        with gr.Column():
            # The output element
            out = gr.HTML(label="Neuron Acts", value=default_html_string)
    for inp in inputs:
        inp.change(shitty_neuron_vis, inputs, out)

In [9]:
demo.launch(share=False, height=1000)

Running on local URL:  http://127.0.0.1:7860

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x7fb3a9dd9e90>, 'http://127.0.0.1:7860/', None)

## Show neuron text

In [10]:
# indices = max_act_store.max_acts.sort(dim=-1, descending=True)[1]
# max_act_store.index = max_act_store.index[indices]
# max_act_store.max_acts = max_act_store.max_acts[indices]
# print(max_act_store)

In [11]:
def vis_texts(texts, neuron_index, max_val, min_val):
    string = "<br><br>".join(
        [shitty_neuron_vis(text, neuron_index, max_val, min_val) for text in texts]
    )
    display(HTML(string))


def vis_neuron(index):
    tokens = data[max_act_store.index[index]]["tokens"]
    vis_texts(
        model.to_string(tokens[:, 1:]), index, max_act_store.max_acts[index].max(), 0.0
    )


vis_texts(["I love you", "I hate you"], 652, 0.003, 0.0)

In [12]:
vis_neuron(10)


__floordiv__ is deprecated, and its behavior will change in a future version of pytorch. It currently rounds toward 0 (like the 'trunc' function NOT 'floor'). This results in incorrect rounding for negative values. To keep the current behavior, use torch.div(a, b, rounding_mode='trunc'), or for actual floor division, use torch.div(a, b, rounding_mode='floor').



In [13]:
# origs = []
# subs = []
# for y in range(10):
#     origs.append(orig)
#     subs.append(sub)
#     orig = []
#     sub = []
#     neuron_index = random.randint(0, 2047)
#     for text_index in range(20):
#         tokens = data[max_act_store.index[neuron_index, text_index].item()]['tokens'][None]
#         acts = get_neuron_acts(tokens, neuron_index)
#         # print(acts.shape)
#         # print(acts[:50])
#         max_pos = acts.argmax()
#         # print(max_pos)
#         sub_tokens = tokens[:, max(0, max_pos-50):max_pos+50]
#         sub_acts = get_neuron_acts(sub_tokens, neuron_index)
#         # print(sub_acts.shape)
#         # print(acts.max())
#         # print(sub_acts.max())
#         # print(acts.argmax())
#         # print(sub_acts.argmax())
#         orig.append(acts.max().item())
#         sub.append(sub_acts.max().item())
#     scatter(orig,sub, hover=np.arange(20), range_x=(0, max_act_store.max_acts[neuron_index].max().item()*1.3), range_y=(0.0, max_act_store.max_acts[neuron_index].max().item()*1.3), xaxis="Orig", yaxis="Sub", title="Orig vs Sub for Neuron {}".format(neuron_index), height=500, width=500)

In [14]:
# df = melt(subs)
# df2 = melt(origs)
# df['sub'] = df['value']
# df['orig'] = df2['value']
# df['value2'] = df['value']
# display(df)
# fig = px.scatter(df, y=['orig', 'value2'], x='sub', facet_col='0')
# # fig.update_layout(shapes = [{'type': 'line', 'yref': 'paper', 'xref': 'paper', 'y0': 0, 'y1': 1, 'x0': 0, 'x1': 0.1}])
# fig.show()

In [15]:
neuron_index = 5
text_index = 1
tokens = data[max_act_store.index[neuron_index, text_index].item()]["tokens"]
text = model.to_string(tokens)
print(text)
(logits, loss), cache = model.run_with_cache(tokens, return_type="both")
print(loss)

<|BOS|>, sigY, sigP to change when forcing peaks.  Not used if forceParams is None.
        dth, dph: The peak center may move by (dth, dph) from predicted position (in units of histogram pixels).
        doPeakConvolution: boolean stating whether we should fit a convolved (smoothed) peak.  This is useful for filling in
                gaps for 3He detector tube packs.
        sigX0Params: a 4 element array with input arguments for coshPeakWidthModel [A,x0,b,BG].  Will ultimately be the
                initial guess at sigma along the scattering direction.
        sigY0: initial guess for sigma in the azimuthal direction.  Units: rad
        sigP0Params: a 4 element array with arguments for the covariance, fSigP [a, k, phi, b]

    """
    h, thBins, phBins = getAngularHistogram(
        box, nTheta=nTheta, nPhi=nPhi, zBG=zBG, fracBoxToHistogram=fracBoxToHistogram, useIDX=goodIDX)
    thCenters = 0.5 * (thBins[1:] + thBins[:-1])
    phCenters = 0.5 * (phBins[1:] + phBins[:-1])
    TH, 

In [16]:
rprint("[bold red]Stuff[/bold red]asd")

In [17]:
model.to_single_token(" which")

533

In [18]:
def display_logits(logit_vec, correct_token, top_k=5):
    if isinstance(correct_token, str):
        correct_token = model.to_single_token(correct_token)
    if isinstance(correct_token, torch.Tensor):
        correct_token = correct_token.item()
    probs = logit_vec.softmax(-1)
    values, indices = probs.sort(descending=True)
    correct_reached = False
    for i in range(10):
        string = (
            f"#{i} {values[i]:.2%} {indices[i]:0>5} |{model.to_string(indices[i])}|"
        )
        # print(indices[i].item(), correct_token)
        if indices[i].item() == correct_token:
            string = "[bold red]" + string + "[/bold red]"
            correct_reached = True
        rprint(string)
    if not correct_reached:
        print("...")
        string = f"#{logit_vec.argsort()[correct_token].item()} {probs[correct_token]:.2%} {correct_token:0>5} |{model.to_string(correct_token)}|"
        rprint(f"[bold red]{string}[/bold red]")


for k in [1, 5, 10, 50, 100, 300, 1000]:
    print(k)
    display_logits(logits[0, k], tokens[k + 1], top_k=5)

1


...


5


...


10


...


50


100


300


1000


In [19]:
print(model.to_string(tokens[290:310]))

-1])
    TH, PH = np.meshgrid(thCenters, phCenters


In [20]:
max_pos = cache["mid", 0][:, neuron_index].argmax()
print(max_pos)
print(cache["mid", 0].shape)

tensor(404, device='cuda:0')
torch.Size([1, 1024, 2048])


In [21]:
cache.remove_batch_dim()

In [22]:
line([cache["pre", 0][max_pos], cache["post", 0][max_pos]], line_labels=["pre", "post"])

line(cache["mid", 0][max_pos])

In [23]:
fig.data[1].yaxis = "y2"
fig.show()

NameError: name 'fig' is not defined

<|BOS|> with storage unit rental costs and store and feel of space hidden storage small sofa end except sofa marks the final look and need choose storage room.
Perfect fit your home find and save ideas about long by high and save ideas about any part of console table on bottom for components triple faux front cabinets with pieces that add to a. Extra deep extra deep. Extra deep console table, or console sofa table off accent your home gallery stores find the perfect home gallery stores find and style and finishes to off. Of stars yaheetech wood iron marble topped console table also elegant. It as an antique bricklayers table on pinterest see more ideas about long livingroom deep console. Cm long narrow console table that.
Wide each for a highly functional hidden bed over sofa is an extra storage table offers both and a range of console table sofa thereby saving the same way to your coastal living room storage. Policies faq. Sofa table with shelves, drawers in espresso sold by date sort

In [None]:
max_act_store.max_acts.shape

torch.Size([2048, 20])