In [1]:
import torch
print(torch.__version__)

# if 2.2.x run
# pip install --upgrade torch==2.5.0 torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121


#if issue with setup.sh then runs
#rm -rf /opt/micromamba/envs/python_310/lib/python3.10/site-packages/sympy*




2.9.0+cu128


In [2]:
import os
os.environ["CUDA_LAUNCH_BLOCKING"] = "1"

In [3]:
import torch
import yaml
import sys
from pathlib import Path

# --- Paths ---
base = Path("/workspace/TinyRecursiveModels/checkpoints/arc_v1_public")
weights_path = base / "step_518071.pt"
config_path = base / "all_config.yaml"

# --- Load config ---
with open(config_path, "r") as f:
    cfg = yaml.safe_load(f)

# --- Add model code to Python path ---
sys.path.append(str(base))
from trm import TinyRecursiveReasoningModel_ACTV1, TinyRecursiveReasoningModel_ACTV1Config

# --- Merge configs ---
merged_cfg = {
    **cfg,
    **cfg.get("arch", {}),
    **cfg.get("data", {}),
    **cfg.get("train", {}),
}

# --- Ensure required parameters exist ---
merged_cfg.setdefault("batch_size", cfg.get("train", {}).get("batch_size", 1))
merged_cfg.setdefault("seq_len", cfg.get("train", {}).get("seq_len", 64))
merged_cfg.setdefault("num_puzzle_identifiers", cfg.get("data", {}).get("num_puzzle_identifiers", 512))
merged_cfg.setdefault("vocab_size", cfg.get("data", {}).get("vocab_size", 1024))

# --- Instantiate model ---
print("Loading model...")
checkpoint = torch.load(weights_path, map_location="cuda" if torch.cuda.is_available() else "cpu", weights_only=True)

config = TinyRecursiveReasoningModel_ACTV1Config(**merged_cfg)
model = TinyRecursiveReasoningModel_ACTV1(config.__dict__)  # Pass as dict
model.load_state_dict(checkpoint, strict=False)
model.eval()

device = "cuda" if torch.cuda.is_available() else "cpu"
model.to(device)

print(f"Model loaded on {device} with {sum(p.numel() for p in model.parameters())/1e6:.2f}M params")


Loading model...
Model loaded on cuda with 7.87M params


In [6]:
import inspect
import dataset.build_arc_dataset as arc_data
print([name for name, obj in inspect.getmembers(arc_data) if inspect.isfunction(obj)])


['arc_grid_to_np', 'aug', 'convert_dataset', 'convert_single_arc_puzzle', 'dataclass', 'dihedral_transform', 'grid_hash', 'inverse_aug', 'inverse_dihedral_transform', 'load_puzzles_arcagi', 'np_grid_to_seq_translational_augment', 'puzzle_hash']


In [9]:
ls /workspace/TinyRecursiveModels/checkpoints


[0m[34;42marc_v1_public[0m/


In [10]:
ls /workspace/TinyRecursiveModels/checkpoints/arc_v1_public


[0m[34;42m__pycache__[0m/  all_config.yaml  losses.py  step_518071.pt  trm.py


In [13]:
import yaml
from pprint import pprint

with open("/workspace/TinyRecursiveModels/checkpoints/arc_v1_public/all_config.yaml", "r") as f:
    cfg = yaml.safe_load(f)

pprint(cfg.keys())


dict_keys(['arch', 'beta1', 'beta2', 'checkpoint_every_eval', 'checkpoint_path', 'data_paths', 'data_paths_test', 'ema', 'ema_rate', 'epochs', 'eval_interval', 'eval_save_outputs', 'evaluators', 'freeze_weights', 'global_batch_size', 'load_checkpoint', 'lr', 'lr_min_ratio', 'lr_warmup_steps', 'min_eval_interval', 'project_name', 'puzzle_emb_lr', 'puzzle_emb_weight_decay', 'run_name', 'seed', 'weight_decay'])


In [14]:
pprint(cfg)


{'arch': {'H_cycles': 3,
          'H_layers': 0,
          'L_cycles': 4,
          'L_layers': 2,
          'expansion': 4,
          'forward_dtype': 'bfloat16',
          'halt_exploration_prob': 0.1,
          'halt_max_steps': 16,
          'hidden_size': 512,
          'loss': {'loss_type': 'stablemax_cross_entropy',
                   'name': 'losses@ACTLossHead'},
          'mlp_t': False,
          'name': 'recursive_reasoning.trm@TinyRecursiveReasoningModel_ACTV1',
          'no_ACT_continue': True,
          'num_heads': 8,
          'pos_encodings': 'rope',
          'puzzle_emb_len': 16,
          'puzzle_emb_ndim': 512},
 'beta1': 0.9,
 'beta2': 0.95,
 'checkpoint_every_eval': True,
 'checkpoint_path': 'checkpoints/Arc1concept-aug-1000-ACT-torch/arc_v1_public_eval',
 'data_paths': ['data/arc1concept-aug-1000'],
 'data_paths_test': [],
 'ema': True,
 'ema_rate': 0.999,
 'epochs': 100000,
 'eval_interval': 10000,
 'eval_save_outputs': [],
 'evaluators': [{'name': 'arc@ARC'

In [17]:
ls -R /workspace/TinyRecursiveModels/checkpoints/arc_v1_public


/workspace/TinyRecursiveModels/checkpoints/arc_v1_public:
[0m[34;42m__pycache__[0m/  all_config.yaml  losses.py  step_518071.pt  trm.py

/workspace/TinyRecursiveModels/checkpoints/arc_v1_public/__pycache__:
trm.cpython-310.pyc


In [18]:
ls -R /workspace/TinyRecursiveModels/dataset


/workspace/TinyRecursiveModels/dataset:
[0m[34;42m__pycache__[0m/          build_maze_dataset.py    common.py
build_arc_dataset.py  build_sudoku_dataset.py

/workspace/TinyRecursiveModels/dataset/__pycache__:
build_arc_dataset.cpython-310.pyc  common.cpython-310.pyc


In [20]:
help(build_arc.convert_dataset)


Help on function convert_dataset in module dataset.build_arc_dataset:

convert_dataset(config: dataset.build_arc_dataset.DataProcessConfig)



In [28]:
from pathlib import Path

# Find where the ARC-AGI jsons actually are
cands = list(Path("/workspace").rglob("arc-agi_*_challenges.json"))
if not cands:
    raise FileNotFoundError("Couldn't find any 'arc-agi_*_challenges.json' under /workspace")

# Prefer training2 to infer the prefix, but fall back to any match
p = next((c for c in cands if "training2_challenges" in c.name), cands[0])

# Strip the suffix to get the prefix (everything before '_training2_challenges.json')
raw_arc_prefix = str(p).split("_training2_challenges.json")[0]
print("Detected raw_arc_prefix:", raw_arc_prefix)

# Verify all 4 required files exist with this prefix
required = [
    f"{raw_arc_prefix}_training2_challenges.json",
    f"{raw_arc_prefix}_training2_solutions.json",
    f"{raw_arc_prefix}_evaluation2_challenges.json",
    f"{raw_arc_prefix}_evaluation2_solutions.json",
]
for f in required:
    print(Path(f), "OK" if Path(f).exists() else "MISSING")


Detected raw_arc_prefix: /workspace/TinyRecursiveModels/kaggle/combined/arc-agi
/workspace/TinyRecursiveModels/kaggle/combined/arc-agi_training2_challenges.json OK
/workspace/TinyRecursiveModels/kaggle/combined/arc-agi_training2_solutions.json OK
/workspace/TinyRecursiveModels/kaggle/combined/arc-agi_evaluation2_challenges.json OK
/workspace/TinyRecursiveModels/kaggle/combined/arc-agi_evaluation2_solutions.json OK


In [30]:
from dataset.build_arc_dataset import DataProcessConfig, convert_dataset

# Use the detected prefix (from your last cell)
raw_arc_prefix = "/workspace/TinyRecursiveModels/kaggle/combined/arc-agi"

# Define where to store the processed dataset
output_dir = "/workspace/TinyRecursiveModels/data/arc_v1_public"

# Build config
config = DataProcessConfig(
    input_file_prefix=raw_arc_prefix,
    output_dir=output_dir,
    subsets=["training2", "evaluation2"],  # matches your JSONs
    test_set_name="evaluation2",
)

# Convert dataset (this builds metadata.json etc.)
convert_dataset(config)
print("âœ… Conversion finished. Output saved to:", output_dir)


[Puzzle bd14c3bf] augmentation not full, only 574
[Puzzle 8dab14c2] augmentation not full, only 575
[Puzzle 3aa6fb7a] augmentation not full, only 576
[Puzzle e6de6e8f] augmentation not full, only 576
[Puzzle 5c0a986e] augmentation not full, only 576
[Puzzle 2697da3f] augmentation not full, only 72
[Puzzle 3618c87e] augmentation not full, only 576
[Puzzle 5168d44c] augmentation not full, only 576
[Puzzle 9bebae7a] augmentation not full, only 575
[Puzzle 90f3ed37] augmentation not full, only 576
[Puzzle 1990f7a8] augmentation not full, only 72
[Puzzle 20981f0e] augmentation not full, only 576
[Puzzle a934301b] augmentation not full, only 576
[Puzzle 18419cfa] augmentation not full, only 576
[Puzzle cce03e0d] augmentation not full, only 576
[Puzzle 4612dd53] augmentation not full, only 576
[Puzzle d37a1ef5] augmentation not full, only 576
[Puzzle f9a67cb5] augmentation not full, only 576
[Puzzle 4258a5f9] augmentation not full, only 576
[Puzzle 0b17323b] augmentation not full, only 288
[P

In [33]:
!ls -lh /workspace/TinyRecursiveModels/data/arc_v1_public


total 36M
-rw-rw-rw- 1 user user  30M Nov  3 19:44 identifiers.json
drwxrwxrwx 2 user user 2.0M Nov  3 19:38 test
-rw-rw-rw- 1 user user 1.2M Nov  3 19:44 test_puzzles.json
drwxrwxrwx 2 user user 2.9M Nov  3 19:38 train


In [41]:
!ls -R /workspace/TinyRecursiveModels | grep trm


trm.py
trm.cpython-310.pyc
trm.yaml
trm_hier6.yaml
trm_singlez.yaml
trm.py
trm_hier6.py
trm_singlez.py


In [45]:
import sys, os

# --- Fix the Python path ---
repo_root = "/workspace/TinyRecursiveModels"
sys.path.insert(0, repo_root)
sys.path.insert(0, os.path.join(repo_root, "models"))
sys.path.insert(0, os.path.join(repo_root, "models", "recursive_reasoning"))

print("Python paths now:", sys.path[:4])

# --- Correct import ---
from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1

print("Successfully imported TinyRecursiveReasoningModel_ACTV1 from models/recursive_reasoning/trm.py")


Python paths now: ['/workspace/TinyRecursiveModels/models/recursive_reasoning', '/workspace/TinyRecursiveModels/models', '/workspace/TinyRecursiveModels', '/workspace/TinyRecursiveModels/models/recursive_reasoning']
Successfully imported TinyRecursiveReasoningModel_ACTV1 from models/recursive_reasoning/trm.py


In [54]:
state_dict = torch.load("/workspace/TinyRecursiveModels/checkpoints/arc_v1_public/step_518071.pt", map_location="cpu")
print(state_dict.keys())


odict_keys(['_orig_mod.model.inner.H_init', '_orig_mod.model.inner.L_init', '_orig_mod.model.inner.embed_tokens.embedding_weight', '_orig_mod.model.inner.lm_head.weight', '_orig_mod.model.inner.q_head.weight', '_orig_mod.model.inner.q_head.bias', '_orig_mod.model.inner.puzzle_emb.weights', '_orig_mod.model.inner.L_level.layers.0.self_attn.qkv_proj.weight', '_orig_mod.model.inner.L_level.layers.0.self_attn.o_proj.weight', '_orig_mod.model.inner.L_level.layers.0.mlp.gate_up_proj.weight', '_orig_mod.model.inner.L_level.layers.0.mlp.down_proj.weight', '_orig_mod.model.inner.L_level.layers.1.self_attn.qkv_proj.weight', '_orig_mod.model.inner.L_level.layers.1.self_attn.o_proj.weight', '_orig_mod.model.inner.L_level.layers.1.mlp.gate_up_proj.weight', '_orig_mod.model.inner.L_level.layers.1.mlp.down_proj.weight'])


In [60]:
from models.recursive_reasoning.trm import TinyRecursiveReasoningModel_ACTV1

config_dict = {
    "batch_size": 32,
    "seq_len": 900,
    "num_puzzle_identifiers": 1000000,
    "vocab_size": 12,  # must match checkpoint
    "H_cycles": 3,
    "L_cycles": 4,
    "H_layers": 1,
    "L_layers": 1,
    "hidden_size": 512,
    "expansion": 4.0,
    "num_heads": 8,
    "pos_encodings": "rope",
    "halt_max_steps": 8,
    "halt_exploration_prob": 0.1
}




model = TinyRecursiveReasoningModel_ACTV1(config_dict)


In [61]:
import torch

checkpoint_path = "/workspace/TinyRecursiveModels/checkpoints/arc_v1_public/step_518071.pt"
raw_state_dict = torch.load(checkpoint_path, map_location="cpu")

# Strip DDP prefix
clean_state_dict = {k.replace("_orig_mod.model.", ""): v for k, v in raw_state_dict.items()}

# Load non-strictly (tolerates harmless mismatches)
missing, unexpected = model.load_state_dict(clean_state_dict, strict=False)
print(f"Model loaded. Missing keys: {len(missing)}, unexpected: {len(unexpected)}")

# Move to GPU and eval mode
device = "cuda" if torch.cuda.is_available() else "cpu"
model = model.to(device).eval()
print(f"Model initialized and ready on {device}.")


Model loaded. Missing keys: 0, unexpected: 5
Model initialized and ready on cuda.


In [62]:
print(model.__class__.__name__)
print(sum(p.numel() for p in model.parameters()) / 1e6, "M parameters")


TinyRecursiveReasoningModel_ACTV1
3.421186 M parameters


ðŸ§© Using ARC task: dataset.json


KeyError: 'train'