In [None]:
# root_folder, mount_folder = "root", "mnt/mithil-arc" # for modal - REPLACE /mithil-arc WITH YOUR VOLUME NAME
root_folder, mount_folder = "content", "content/drive/MyDrive"  # for colab

%cd /$root_folder/
!git clone https://github.com/mvakde/mdlARC.git # `-b <branch_name> --single-branch` if branch
%cd /$root_folder/mdlARC

In [None]:
!python dataset_building_scripts/build_datasets.py --datasets arc1 conceptarc  --splits train eval --with-solutions --cleanup none

!rm -rf /$root_folder/mdlARC/interactive-run.ipynb
!rm -rf /$root_folder/mdlARC/clean-env-run.ipynb
!rm -rf /$root_folder/mdlARC/max-clean-env-run.ipynb
!rm -rf /$root_folder/mdlARC/dataset_building_scripts
!rm -rf /$root_folder/mdlARC/readme.md
!rm -rf /$root_folder/mdlARC/img

In [None]:
from pathlib import Path
import argparse
import importlib
import sys

PROJECT_ROOT = Path.cwd()
SRC_DIR = PROJECT_ROOT / "src"
if SRC_DIR.exists() and str(SRC_DIR) not in sys.path:
    sys.path.insert(0, str(SRC_DIR))

import utils, tinytransformer, train, build
importlib.reload(utils)  # pick up code changes during iteration
importlib.reload(tinytransformer)
importlib.reload(train)
importlib.reload(build)

args = {
    # run config
    "name": "arc1-refactor",  # download file name
    "GPU": "A100",  # just for logging purposes
    # paths - must pass as Path("<path_to_dir>")
    "data_path": Path("assets/challenges.json"),
    "train_log_file": Path("runs/training_log.txt"),
    "save_path": Path("runs/tiny.pt"),
    "checkpoint_path": None, #Path("runs/tiny.pt"),  # or None to start from scratch
    "checkpoint_epochs": [300,400,500,600,605],  # int N for every N epochs, or list [5, 10, 25]
    
    
    # hyperparameters
    "epochs": 20,
    "batch_size": 32,
    "gradient_accumulation_steps": 1,
    "do_validate": True,
    "val_batch_size": 140,

    "enable_aug": True,
    "max_augments": 10,
    "enable_color_aug": True,
    "color_apply_to_test": True,
    "enable_dihedral_aug": True,
    "dihedral_apply_to_test": True,


    "optimizer": "normuon",  # "adamw" | "normuon"
    "normuon_lr": 1.66e-3,
    "normuon_momentum": 0.95,
    "normuon_beta2": 0.95,
    "lr": 3e-4, #adamw lr

    "warmup_pct": 0.02,
    "wsd_decay_start_pct": 0.8,  # 1.0 = no decay (start at last epoch)
    "lr_floor": 0.0,

    "weight_decay": 0.1,
    "attention_weight_decay": 0.01,
    "token_embedding_weight_decay": 0.01,
    "task_embedding_weight_decay": 0.01,

    "grad_clip": 1.0,
    "dropout": 0.1,
    "seed": 42,

    # Model Architecture
    "d_model": 768,  # 128, 256, 512, 768 | 128, 384, 640
    "n_heads": 12,  # 4, 8, 8/16, 12 | 4, 12, 10
    "d_ff": 3072,  # 512, 1024, 2048, 3072 | 512, 1536, 2560
    "n_layers": 4,  # 4, 6, 16, 16 | 24, 28, 24

    "inference_temperature": None,
    "inference_top_k": None,
}
cfg = argparse.Namespace(**args)

runs_dir = Path("runs")
runs_dir.mkdir(parents=True, exist_ok=True)
with (runs_dir / "config.txt").open("w") as f:
    for k, v in args.items():
        f.write(f"{k}: {v}\n")

model, dataset, dataloader, device, data_path = build.build_model_and_data(cfg)

In [None]:
# Training only
from time import perf_counter

t_start = perf_counter()
train.train_model(cfg,model=model,dataloader=dataloader,dataset=dataset,device=device,data_path=data_path)
t_duration = perf_counter() - t_start

print(f"Training took {t_duration:.2f}s")
with open(Path("runs/timing.txt"), "w") as f:
    f.write(f"Training: {t_duration:.4f} s\n")

In [None]:
utils.cleanup_memory(globals()) # cleaning up memory to run inference

In [None]:
from pathlib import Path
import pickle
import importlib
import evaluate
import utils
importlib.reload(evaluate)
importlib.reload(utils)

# === Evaluation Configuration ===
# Run name: creates output folder at runs/<EVAL_NAME>/
EVAL_NAME = "eval_100aug"

# Max augments: number of augmented variants per example for test-time augmentation (TTA).
# Higher = more diverse predictions for AAIVR voting, but slower inference.
EVAL_MAX_AUGMENTS = 100

# Path to dataset (challenges.json)
EVAL_DATA_PATH = Path("assets/challenges.json")

# Path to model checkpoint
EVAL_CHECKPOINT_PATH = Path("runs/tiny.pt")

# Batch size for inference
EVAL_BATCH_SIZE = 1300

# Splits to evaluate
EVAL_SPLITS = ["test"]

# Specific task IDs to evaluate (None = all tasks, or ["00576224", ...] for specific tasks)
EVAL_TASK_IDS = None

# === Run Evaluation ===
eval_result = evaluate.run_evaluation(
    cfg,
    run_name=EVAL_NAME,
    max_augments=EVAL_MAX_AUGMENTS,
    data_path=EVAL_DATA_PATH,
    checkpoint_path=EVAL_CHECKPOINT_PATH,
    batch_size=EVAL_BATCH_SIZE,
    splits=EVAL_SPLITS,
    task_ids=EVAL_TASK_IDS,
)

Path("runs/eval_results.pkl").write_bytes(pickle.dumps([eval_result]))  # Wrap in list for backward compat
print("Saved runs/eval_results.pkl")

In [None]:
# visualisation
from pathlib import Path
import pickle

if "eval_result" not in globals():
    eval_results = pickle.loads(Path("runs/eval_results.pkl").read_bytes())
    EVAL_SUB_FOLDER = eval_results[0][0]
elif "EVAL_NAME" in globals():
    EVAL_SUB_FOLDER = EVAL_NAME
else:
    EVAL_SUB_FOLDER = eval_result[0]

VIS_MODE = "!"  # "!" = compare vs solutions, "submission" = attempts-only
utils.visualize_submissions(Path("runs") / EVAL_SUB_FOLDER / "submission.json", solutions_file="assets/solutions.json", mode=VIS_MODE)

In [None]:
# scoring
from pathlib import Path
import pickle

if "eval_result" not in globals():
    eval_results = pickle.loads(Path("runs/eval_results.pkl").read_bytes())
    EVAL_SUB_FOLDER = eval_results[0][0]
elif "EVAL_NAME" in globals():
    EVAL_SUB_FOLDER = EVAL_NAME
else:
    EVAL_SUB_FOLDER = eval_result[0]

SOLUTIONS_FILE = Path("assets/solutions.json")
SUBMISSION_FILE = Path(f"runs/{EVAL_SUB_FOLDER}/submission.json")

score = utils.score_arc_submission(SOLUTIONS_FILE, SUBMISSION_FILE)