<!-- SPDX-License-Identifier: MPL-2.0 -->

# ARC AGI Solver - Kaggle Submission
This notebook adapts the ARC AGI Solver for the Kaggle environment.

## Setup Environment

In [None]:
import os
import sys
import json
from pathlib import Path

# Set up paths
KAGGLE_INPUT = "/kaggle/input"
KAGGLE_WORKING = "/kaggle/working"
REPO_DIR = os.path.join(KAGGLE_WORKING, "arc-agi-solver")

# Clone the repository
if not os.path.exists(REPO_DIR):
    !git clone https://github.com/lexsightllc/arc-agi-solver.git {REPO_DIR}

# Install dependencies
!pip install -r {os.path.join(REPO_DIR, 'requirements.txt')}
!pip install -e {REPO_DIR}

# Add to Python path
sys.path.append(REPO_DIR)

## Configure Paths for Kaggle

In [None]:
import yaml

# Update config paths
config_path = os.path.join(REPO_DIR, "config", "config.yaml")
with open(config_path, 'r') as f:
    config = yaml.safe_load(f)

# Update paths for Kaggle
config['data_dir'] = os.path.join(KAGGLE_INPUT, "arc-prize-2025")
config['private_data_dir'] = os.path.join(KAGGLE_INPUT, "arc-agi-solver")
config['log_dir'] = os.path.join(KAGGLE_WORKING, "logs")
config['output_dir'] = os.path.join(KAGGLE_WORKING, "outputs")

# Create necessary directories
os.makedirs(config['log_dir'], exist_ok=True)
os.makedirs(config['output_dir'], exist_ok=True)

# Save updated config
with open(os.path.join(KAGGLE_WORKING, "config_kaggle.yaml"), 'w') as f:
    yaml.dump(config, f)

## Data Preparation

In [None]:
from src.data.prepare_data import load_arc_dataset, split_dataset

# Load competition data
train_data = load_arc_dataset(os.path.join(KAGGLE_INPUT, "arc-prize-2025", "training"))
test_data = load_arc_dataset(os.path.join(KAGGLE_INPUT, "arc-prize-2025", "test"))

# Load private dataset if available
private_data = {}
private_data_path = os.path.join(KAGGLE_INPUT, "arc-agi-solver", "data")
if os.path.exists(private_data_path):
    private_data = load_arc_dataset(private_data_path)

# Combine datasets if needed
if private_data:
    train_data.update(private_data)

print(f"Loaded {len(train_data)} training examples")
print(f"Loaded {len(test_data)} test examples")

## Training

In [None]:
from src.learning.trainer import Trainer
from src.learning.dataset import ARCDataset
from src.learning.models import PolicyNetwork
import torch

# Initialize model and trainer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = PolicyNetwork().to(device)
trainer = Trainer(
    model=model,
    train_data=train_data,
    val_data=test_data,  # In practice, use a proper validation split
    config=config,
    device=device
)

# Train the model
trainer.train()

## Inference and Submission

In [None]:
from src.inference.predictor import Predictor
import pandas as pd

# Initialize predictor
predictor = Predictor(model=model, device=device)

# Generate predictions
predictions = {}
for task_id, task_data in test_data.items():
    try:
        prediction = predictor.predict(task_data)
        predictions[task_id] = prediction
    except Exception as e:
        print(f"Error processing task {task_id}: {str(e)}")
        predictions[task_id] = None

# Create submission
submission = pd.DataFrame({
    'output_id': [f"{task_id}_output" for task_id in predictions.keys()],
    'output': list(predictions.values())
})

# Save submission
submission_path = os.path.join(KAGGLE_WORKING, "submission.csv")
submission.to_csv(submission_path, index=False)
print(f"Submission saved to {submission_path}")

## Save Model and Logs

In [None]:
# Save model
model_path = os.path.join(KAGGLE_WORKING, "model.pth")
torch.save(model.state_dict(), model_path)

# Save logs and outputs
!cp -r {os.path.join(REPO_DIR, 'logs')} {KAGGLE_WORKING}
!cp -r {os.path.join(REPO_DIR, 'outputs')} {KAGGLE_WORKING}

print("Notebook execution completed successfully!")