In [None]:
# for Colab use

# ! git clone https://github.com/stevenabreu7/hybrid-interpretability
# ! cd hybrid-interpretability
# ! bash install.sh

In [None]:
import kagglehub
import os

kagglehub.login()

In [None]:
# import model weights

google_recurrentgemma_pytorch_2b_it_1_path = kagglehub.model_download('google/recurrentgemma/PyTorch/2b/1')

## Installation

In [None]:
#@title Imports
import pathlib
import torch

import sentencepiece as spm
from recurrentgemma import torch as recurrentgemma

device = "cuda" if torch.cuda.is_available() else "cpu"

In [None]:
google_recurrentgemma_pytorch_2b_it_1_path="/root/.cache/kagglehub/models/google/recurrentgemma/PyTorch/2b/1"

In [None]:
VARIANT = '2b'
weights_dir = pathlib.Path(f"{google_recurrentgemma_pytorch_2b_it_1_path}")
ckpt_path = weights_dir / f'{VARIANT}.pt'
vocab_path = weights_dir / 'tokenizer.model'
preset = recurrentgemma.Preset.RECURRENT_GEMMA_2B_V1 if '2b' in VARIANT else recurrentgemma.Preset.RECURRENT_GEMMA_9B_V1

### Load and prepare RG

In [None]:
# Load parameters
params = torch.load(str(ckpt_path))
params = {k : v.to(device=device) for k, v in params.items()}

  params = torch.load(str(ckpt_path))


In [None]:
model_config = recurrentgemma.GriffinConfig.from_torch_params(
    params,
    preset=preset,
)
model = recurrentgemma.Griffin(model_config, device=device, dtype=torch.bfloat16)
model.load_state_dict(params)

<All keys matched successfully>

In [None]:
model.enable_sparsification(k = 3, metric = "entropy", prefill = False)

In [None]:
vocab = spm.SentencePieceProcessor()
vocab.Load(str(vocab_path))

True

In [None]:
sampler = recurrentgemma.Sampler(model=model, vocab=vocab)

### generate

In [None]:
input_batch = ["I once had a girl, or should I say, she once had  "]

# 30 generation steps
out_data = sampler(input_strings=input_batch, total_generation_steps=30)

for input_string, out_string in zip(input_batch, out_data.text):
  print(f"Prompt:\n{input_string}\nOutput:\n{out_string}")
  print(10*'#')

Prompt:
I once had a girl, or should I say, she once had  
Output:
me.

I was a young girl, a young girl, a young girl, a young girl, a young girl, a young girl, a
##########


## Testing attention sparsification

In [None]:
input_batch = ["I once had a girl, or should I say, she once had "]

model.disable_attention_manipulation()
model.enable_sparsification(k = 3, metric = "entropy", prefill = False)

# 30 generation steps
out_data = sampler(input_strings=input_batch, total_generation_steps=30)

for input_string, out_string in zip(input_batch, out_data.text):
  print(f"Prompt:\n{input_string}\nOutput:\n{out_string}")
  print(10*'#')

Prompt:
I once had a girl, or should I say, she once had  
Output:
me.

I was a young man, a young man with a lot of potential. I was a young man with a lot of potential, and
##########


##NIAH

###Loading NIAH

In [None]:
%cd /content/

/content


In [None]:
!pip install flash-attn --no-build-isolation

In [None]:
!pwd

In [None]:
!pip install python-dotenv tiktoken anthropic

In [None]:
from google.colab import userdata
os.environ['HF_TOKEN'] = userdata.get('HF_TOKEN')
!python NIAH/Needle_test/prompt.py

📝 Loading configuration...
🔧 Creating Prompter instance...
🚀 Initializing Prompter...
📁 Haystack directory: LongAlign/Needle_test/PaulGrahamEssays
💾 Save directory: LongAlign/Needle_test/prompts
⚙️ Setting up context lengths...
✅ Context lengths set: [ 250  677 1105 1532 1959 2387 2814 3241 3669 4096]
⚙️ Setting up document depth percentages...
✅ Depth percentages set: [  0  11  22  33  44  56  67  78  89 100]
🔄 Initializing Huggingface tokenizer...
🤗 Initializing HuggingFace tokenizer...
🌐 Loading tokenizer from HuggingFace Hub: models/recurrent_gemma_default
❌ HuggingFace tokenizer initialization failed: models/recurrent_gemma_default is not a local folder and is not a valid model identifier listed on 'https://huggingface.co/models'
If this is a private repository, make sure to pass a token having permission to this repo either by logging in with `huggingface-cli login` or by passing `token=<your_token>`
❌ Tokenizer initialization failed: models/recurrent_gemma_default is not a local

###Run predictions

In [None]:
!CUDA_VISIBLE_DEVICES=0 python NIAH/Needle_test/pred.py

🔹 Loading configuration...
🔹 Prompt directory (relative): LongAlign/Needle_test/prompts
🔹 Prompt directory (absolute): /content/LongAlign/Needle_test/prompts
🔹 Model provider: Huggingface
  params = torch.load(
🔹 Looking for prompt files matching pattern: LongAlign/Needle_test/prompts/Huggingface_*_prompts.json
🔹 Found 100 matching files
📂 Loading prompt file: LongAlign/Needle_test/prompts/Huggingface_250_22_prompts.json
🔍 Prompt from LongAlign/Needle_test/prompts/Huggingface_250_22_prompts.json length: 330 chars
📂 Loading prompt file: LongAlign/Needle_test/prompts/Huggingface_677_89_prompts.json
🔍 Prompt from LongAlign/Needle_test/prompts/Huggingface_677_89_prompts.json length: 2160 chars
📂 Loading prompt file: LongAlign/Needle_test/prompts/Huggingface_2387_100_prompts.json
🔍 Prompt from LongAlign/Needle_test/prompts/Huggingface_2387_100_prompts.json length: 9718 chars
📂 Loading prompt file: LongAlign/Needle_test/prompts/Huggingface_2387_33_prompts.json
🔍 Prompt from LongAlign/Needle_

###Eval and Viz

make sure to set api key before running in eval.py

In [None]:
%cd /content/

/content


In [None]:
import subprocess
import os
from google.colab import userdata
my_env = os.environ.copy()
hf_token = userdata.get('HF_TOKEN')
custom_args = dict(token=hf_token)
custom_args = dict(token=hf_token, cache_dir="./.cache")  # speeds up loading
my_env["CUDA_VISIBLE_DEVICES"] = "0"  # adjust for multi-gpu

In [None]:
!python NIAH/Needle_test/eval.py

Found K values: [0, 1, 2, 10]

Evaluation Summary:

K0:
  Total predictions: 100
  Successful predictions: 0
  Success rate: 0.00%

K1:
  Total predictions: 100
  Successful predictions: 1
  Success rate: 1.00%

K2:
  Total predictions: 100
  Successful predictions: 50
  Success rate: 50.00%

K10:
  Total predictions: 100
  Successful predictions: 58
  Success rate: 58.00%


In [None]:
!python LongAlign/Needle_test/vis.py

Found JSON files: ['LongAlign/Needle_test/results/binary_eval_all_k.json']

Processing file: LongAlign/Needle_test/results/binary_eval_all_k.json
Processing K0
Processing K1
Processing K2
Processing K10


In [None]:
!zip -r /content/prompt.zip /content/LongAlign/Needle_test/prompts
from google.colab import files
files.download("/content/PRED/prompt.zip")

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!rm -r pred_K10
!ls

adjusted	  config-prompt.yaml		    pred_K1	 pred.py     README.md
config-eval.yaml  eval.py			    pred_K1.zip  prompt.py   vis.py
config-pred.yaml  llama_flash_attn_monkey_patch.py  pred_K2	 prompts
config_pred.yaml  PaulGrahamEssays		    pred_K2.zip  prompt.zip


In [None]:
!zip -r /content/pred_K1.zip /content/LongAlign/Needle_test/pred_K1
files.download("/content/pred_K1.zip")

In [None]:
from google.colab import files

!zip -r /content/pred_K2.zip /content/LongAlign/Needle_test/pred_K2
files.download("/content/pred_K2.zip")

In [None]:
!zip -r /content/results.zip /content/LongAlign/Needle_test/results
files.download("/content/results.zip")