### Config

In [1]:
%load_ext autoreload
%autoreload 2

import sys, os
from hydra import compose, initialize
from omegaconf import OmegaConf

initialize(config_path="../config", version_base="1.3")
cfg = compose(config_name="config")
print(OmegaConf.to_yaml(cfg))

if str(cfg.paths.project_root) not in sys.path:
    sys.path.insert(0, str(cfg.paths.project_root))

paths:
  project_root: /home/p84400019/projects/consciousness-llms/IT-LLMs/
  model_path: ${model.company}/${model.model_family}/${model.model_size}/${model.it}/
  activation_method: ${time_series.node_type}/${time_series.node_activation}/${time_series.projection_method}/
  phyid_method: ${paths.activation_method}phyid_tau-${phyid.tau}/phyid_kind-${phyid.kind}/phyid_redundancy-${phyid.redundancy}/
  deactivation_method: deactivate_k_nodes_per_iteration-${deactivation_analysis.deactivate_k_nodes_per_iteration}/max_deactivated_nodes-${deactivation_analysis.max_deactivated_nodes}/
  data_dir: ${paths.project_root}data/${paths.model_path}${generation.name}/
  data_activations_dir: ${paths.data_dir}activations/
  data_activations_file: ${paths.data_activations_dir}multi_prompt_activations.pkl
  data_phyid_dir: ${paths.data_dir}phyid/${paths.phyid_method}
  data_phyid_file: ${paths.data_phyid_dir}multi_prompt_phyid.pkl
  data_phyid_file_data_array: ${paths.data_phyid_dir}multi_prompt_phyid.n

### Loading the model

In [2]:
from transformers import AutoTokenizer, AutoModelForCausalLM, GenerationConfig, AutoConfig
from src.utils import perturb_model, randomize_model_weights

load_model = True

model_name = cfg.model.hf_name
tokenizer = AutoTokenizer.from_pretrained(
    model_name, 
    trust_remote_code=True
)
if load_model:
    model = AutoModelForCausalLM.from_pretrained(
        model_name, 
        device_map='auto', 
        attn_implementation='eager',  
        trust_remote_code=True,
        revision=cfg.model.revision if hasattr(cfg.model, 'revision') else None,
    )
    if cfg.model.it == 'random':
        # randomize_model_weights(model, mean=0.0, std=0.02)
        perturb_model(model, scale=10.0)
    # model.generation_config = GenerationConfig.from_pretrained(model_name)
    # model.generation_config.pad_token_id = model.generation_config.eos_token_id
    model.eval()
print(type(model))
print(model)
print(model.config)


  from .autonotebook import tqdm as notebook_tqdm


<class 'transformers.models.gemma3.modeling_gemma3.Gemma3ForCausalLM'>
Gemma3ForCausalLM(
  (model): Gemma3TextModel(
    (embed_tokens): Gemma3TextScaledWordEmbedding(262144, 1152, padding_idx=0)
    (layers): ModuleList(
      (0-25): 26 x Gemma3DecoderLayer(
        (self_attn): Gemma3Attention(
          (q_proj): Linear(in_features=1152, out_features=1024, bias=False)
          (k_proj): Linear(in_features=1152, out_features=256, bias=False)
          (v_proj): Linear(in_features=1152, out_features=256, bias=False)
          (o_proj): Linear(in_features=1024, out_features=1152, bias=False)
          (q_norm): Gemma3RMSNorm((256,), eps=1e-06)
          (k_norm): Gemma3RMSNorm((256,), eps=1e-06)
        )
        (mlp): Gemma3MLP(
          (gate_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (up_proj): Linear(in_features=1152, out_features=6912, bias=False)
          (down_proj): Linear(in_features=6912, out_features=1152, bias=False)
          (act_fn): P

### Record activations, save them, and verify them

In [3]:
from src.activation_recorder import ActivationRecorder, MultiPromptActivations

os.environ["TORCHDYNAMO_DISABLE"] = "1"

load_from_disk = False
data_activations_file = cfg.paths.data_activations_file
max_new_tokens=cfg.generation.max_new_tokens
prompts = cfg.generation.prompts
# if it not a list but a list of lists, flatten it
if isinstance(prompts, list) and all(isinstance(p, list) for p in prompts):
    prompts = [item for sublist in prompts for item in sublist]

prompt_template = cfg.model.apply_chat_template
print(f"Using prompt_template: {prompt_template}")

if not load_from_disk:
    recorder = ActivationRecorder(model, tokenizer)
    activations = recorder.record_prompts(prompts, max_new_tokens=max_new_tokens, prompt_template=prompt_template)
    print(f"Recorded {len(activations)} activations for {len(prompts)} prompts.")
    activations.save(data_activations_file)
    activations.verify_recorded_activations(prompts=prompts, max_new_tokens=max_new_tokens, tokenizer=tokenizer, diff_q_size=True)

Using prompt_template: base
Recorder initialized for model: ModelInformation(model_name='google/gemma-3-1b-pt', model_architecture='Gemma3ForCausalLM', num_layers=26, num_attention_heads_per_layer=4, total_num_attention_heads=104, hidden_size=1152, head_dim=256, n_routed_experts=None, n_shared_experts=None, num_experts_per_tok=None, attention_implementation='default')
Working on prompt 0: Question: Imagine a future where humans have evolved to live underwater. Describe the adaptations they might develop.
 Answer: 

Prompt 0 completed: Question: Imagine a future where humans have evolved to live underwater. Describe the adaptations they might develop.
 Answer: 1. Humans might develop gills and gills might evolve, too.
 2. They might need to evolve air tanks, also.
 3. Many people might die because of lack of oxygen in water.

Recorded 1 activations for 1 prompts.
MultiPromptActivations successfully saved to '/home/p84400019/projects/consciousness-llms/IT-LLMs/data/google/gemma-3/1B/base

AssertionError: Expected 128 steps, got 44

In [None]:
# Load the activations from disk.
loaded_activations = MultiPromptActivations.load(data_activations_file)

# Optional: verify the loaded activations match the saved ones.
print("Loaded MultiPromptActivations object has:", len(loaded_activations.prompts), "prompts recorded.")

# Check again the activations
loaded_activations.verify_recorded_activations(prompts=prompts, max_new_tokens=max_new_tokens, tokenizer=tokenizer, diff_q_size=True)

print("Final MultiPromptActivations object has:", len(loaded_activations.prompts), "prompts recorded.")

# Extract the first prompt, first step, first layer, first head
prompt_acts = loaded_activations.prompts[0]
step_acts = prompt_acts.steps[0]
layer_acts = step_acts.layers[0]
attn = layer_acts.attention
for head_idx, head_acts in attn.heads.items():
    print(f"Head {head_idx} activations:")
    print(head_acts.query.shape)
    print(head_acts.attention_weights.shape)
    print(head_acts.attention_outputs.shape)
    print(head_acts.projected_outputs.shape)

In [None]:
step_acts = prompt_acts.steps[4]
for layer_idx, layer_acts in step_acts.layers.items():
    print(f"Layer {layer_idx} activations:")
    moe_layer_acts = step_acts.layers[2].moe
    for expert_id, expert_acts in moe_layer_acts.experts.items():
        print(f"Expert {expert_id} activations:")
        print(repr(expert_acts))
        print("Gate value:", expert_acts.gate_value)
        print("MLP output:", expert_acts.mlp_output)
        print("Expert output:", expert_acts.expert_output)