In [4]:
import numpy as np
import os
from dotenv import load_dotenv, find_dotenv

In [6]:
load_dotenv(dotenv_path=".env")
api_key = os.getenv('GOODFIRE_API_KEY')

In [7]:
import goodfire
client = goodfire.Client(api_key=api_key)

variant = goodfire.Variant("meta-llama/Meta-Llama-3.1-8B-Instruct")


# edits = client.features.AutoSteer(
#     # specification="Talk with a lot of puns",
#     model = variant
# )
print(variant)



Variant(
   base_model=meta-llama/Meta-Llama-3.1-8B-Instruct,
   edits={
   }
   scopes={
   }
)


# FORWARD PASSES

In [9]:
output = client.chat.completions.create(
    [{"role": "user", "content": "Tell me about the rainy weather in Hong Kong"}],
    model = variant,
    stream = True,
    max_completion_tokens=80
)

In [14]:
act = client.features.activations([{"role": "assistant", "content": message_text}], model=variant)

In [18]:
inspector = client.features.inspect(msgs, model=variant, aggregate_by="sum")

---

In [10]:
chunks = list(output)

In [12]:
role, message_text = extract_message(chunks)
(message_text)

"Hong Kong's rainy weather is quite unique.  The city experiences a subtropical monsoon climate, with most of its annual rainfall falling between May and September. \n\nDuring this period, Hong Kong can experience heavy downpours and typhoons, which can cause flash flooding and strong winds. The average annual rainfall in Hong Kong is around 2,000 mm, with some areas receiving up to"

In [16]:
msgs = [
    # {"role": "user", "content": "Tell me about the rainy weather in Hong Kong"},
    {"role": "assistant", "content": message_text}
]

In [54]:
features = client.features.search(
    "Godzilla",
    model = variant,
    top_k=5
)
features
cols = [f.index_in_sae for f in features]

feat_act = act[:,cols]
feat_act

array([[0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.

In [11]:
def extract_message(chunks, choice_idx=0):
    role = "assistant"
    parts = []
    for ch in chunks:
        delta = ch.choices[choice_idx].delta
        if getattr(delta, "role", None):
            role = delta.role
        if getattr(delta, "content", None):
            parts.append(delta.content)
    text = "".join(parts)
    return role, text

In [65]:
tokens = inspector.tokens

In [75]:
tokens

[Token("<|begin_of_text|>"),
 Token("<|start_header_id|>"),
 Token("system"),
 Token("<|end_header_id|>"),
 Token("
 
 "),
 Token("Cut"),
 Token("ting"),
 Token(" Knowledge"),
 Token(" Date"),
 Token(":"),
 Token(" December"),
 Token(" "),
 Token("202"),
 Token("3"),
 Token("
 "),
 Token("Today"),
 Token(" Date"),
 Token(":"),
 Token(" "),
 Token("26"),
 Token(" Jul"),
 Token(" "),
 Token("202"),
 Token("4"),
 Token("
 
 "),
 Token("<|eot_id|>"),
 Token("<|start_header_id|>"),
 Token("user"),
 Token("<|end_header_id|>"),
 Token("
 
 "),
 Token("God"),
 Token("zilla"),
 Token("!"),
 Token(" The"),
 Token(" King"),
 Token(" of"),
 Token(" the"),
 Token(" Monsters"),
 Token("."),
 Token(" He"),
 Token("'s"),
 Token(" a"),
 Token(" giant"),
 Token(","),
 Token(" fire"),
 Token("-bre"),
 Token("athing"),
 Token(","),
 Token(" city"),
 Token("-st"),
 Token("om"),
 Token("ping"),
 Token(" rept"),
 Token("ile"),
 Token(" from"),
 Token(" Japanese"),
 Token(" legend"),
 Token("."),
 Token(" Cre

In [25]:
inspector.top(5)

FeatureActivations(
   0: (Feature("Content discussing Hong Kong, particularly in sensitive or censorship-relevant contexts"), 62.8095703125)
   1: (Feature("Descriptions of temperature patterns and seasonal variations"), 56.7080078125)
   2: (Feature("Numerical ranges describing environmental measurements, especially precipitation"), 49.794921875)
   3: (Feature("Detailed descriptions of locations and experiences"), 16.2568359375)
   4: (Feature("Geographic and administrative relationships in formal writing, especially regarding Southeast Asia"), 11.39453125)
)

"Hong Kong's rainy weather is quite unique.  The city experiences a subtropical monsoon climate, with most of its annual rainfall falling between May and September. \n\nDuring this period, Hong Kong can experience heavy downpours and typhoons, which can cause flash flooding and strong winds. The average annual rainfall in Hong Kong is around 2,000 mm, with some areas receiving up to"

In [21]:
neighbors =  client.features.search(
    message_text[0:99],
    model=variant,
    top_k=5
)

# Get indices
idx = []
for neighbor in neighbors:
    idx.append(neighbor.index_in_sae)


look = client.features.lookup(indices=idx, model=variant)

    

In [22]:
for neighbor in neighbors:
    print(neighbor)

Feature("References to Hong Kong as a distinct entity")
Feature("Content discussing Hong Kong, particularly in sensitive or censorship-relevant contexts")
Feature("Comparing weather and climate patterns between locations")
Feature("Formal descriptive content about Shanghai's features and characteristics")
Feature("Singapore-specific contexts, especially weather and formal language")


In [23]:
look

{36518: Feature("Comparing weather and climate patterns between locations"),
 62852: Feature("Content discussing Hong Kong, particularly in sensitive or censorship-relevant contexts"),
 38722: Feature("Formal descriptive content about Shanghai's features and characteristics"),
 58212: Feature("Singapore-specific contexts, especially weather and formal language"),
 44860: Feature("References to Hong Kong as a distinct entity")}

In [24]:
shai = act[:,idx]

In [26]:
np.sum(shai, axis=0)

array([19.46484375, 62.80957031, 17.08789062,  0.        ,  0.        ])

In [27]:
act.shape

(111, 65536)