<a href="https://colab.research.google.com/github/sulaimonao/sulaimonao/blob/main/ARC_Kaggle_2024.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Data Preprocessing

In [4]:
import json
import numpy as np
import torch
from torch.utils.data import DataLoader, Dataset

# Load the datasets
def load_json(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

train_challenges = load_json('/content/drive/MyDrive/ARC_Kaggle_2024/arc-agi_training_challenges.json')
train_solutions = load_json('/content/drive/MyDrive/ARC_Kaggle_2024/arc-agi_training_solutions.json')
eval_challenges = load_json('/content/drive/MyDrive/ARC_Kaggle_2024/arc-agi_evaluation_challenges.json')
eval_solutions = load_json('/content/drive/MyDrive/ARC_Kaggle_2024/arc-agi_evaluation_solutions.json')
test_challenges = load_json('/content/drive/MyDrive/ARC_Kaggle_2024/arc-agi_test_challenges.json')

# Preprocess the data
def preprocess_data(challenges, solutions=None):
    data = []
    for task_id, task in challenges.items():
        for pair in task['train']:
            input_grid = np.array(pair['input'])
            output_grid = np.array(pair['output'])
            data.append((input_grid, output_grid))
        if solutions:
            for idx, pair in enumerate(task['test']):
                input_grid = np.array(pair['input'])
                output_grid = np.array(solutions[task_id][idx])
                data.append((input_grid, output_grid))
    return data

train_data = preprocess_data(train_challenges, train_solutions)
eval_data = preprocess_data(eval_challenges, eval_solutions)
test_data = preprocess_data(test_challenges)

# Create custom dataset class
class GridDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx][0], self.data[idx][1]

# Create data loaders
train_loader = DataLoader(GridDataset(train_data), batch_size=32, shuffle=True)
eval_loader = DataLoader(GridDataset(eval_data), batch_size=32, shuffle=False)


# Resizing Grids to a Fixed Size

In [5]:
import torch
import torch.nn.functional as F

# Helper function to resize grids
def resize_grid(grid, size=30):
    grid_tensor = torch.tensor(grid, dtype=torch.float32)
    grid_tensor = grid_tensor.unsqueeze(0).unsqueeze(0)  # Add batch and channel dimensions
    grid_tensor = F.interpolate(grid_tensor, size=(size, size), mode='nearest')
    return grid_tensor.squeeze().numpy().astype(int)

# Resize all grids to 30x30
def preprocess_data(challenges, solutions=None, size=30):
    data = []
    for task_id, task in challenges.items():
        for pair in task['train']:
            input_grid = resize_grid(pair['input'], size)
            output_grid = resize_grid(pair['output'], size)
            data.append((input_grid, output_grid))
        if solutions:
            for idx, pair in enumerate(task['test']):
                input_grid = resize_grid(pair['input'], size)
                output_grid = resize_grid(solutions[task_id][idx], size)
                data.append((input_grid, output_grid))
    return data

train_data = preprocess_data(train_challenges, train_solutions, size=30)
eval_data = preprocess_data(eval_challenges, eval_solutions, size=30)
test_data = preprocess_data(test_challenges, size=30)


# Model Design and Training

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset

# Example CNN Model
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.fc1 = nn.Linear(64 * 30 * 30, 512)  # Adjust based on input size
        self.fc2 = nn.Linear(512, 30 * 30)  # Adjust based on output size

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = torch.relu(self.conv2(x))
        x = x.view(x.size(0), -1)
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x.view(-1, 30, 30)  # Adjust based on output size

# Create custom dataset class
class GridDataset(Dataset):
    def __init__(self, data):
        self.data = data

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx][0], self.data[idx][1]

# Create data loaders
train_loader = DataLoader(GridDataset(train_data), batch_size=32, shuffle=True)
eval_loader = DataLoader(GridDataset(eval_data), batch_size=32, shuffle=False)
test_loader = DataLoader(GridDataset(test_data), batch_size=32, shuffle=False)

# Instantiate and train the model
model = SimpleCNN()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

def train_model(model, train_loader, criterion, optimizer, num_epochs=25):
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for inputs, labels in train_loader:
            inputs = inputs.unsqueeze(1).float()  # Add channel dimension
            labels = labels.float()

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch {epoch}/{num_epochs - 1}, Loss: {epoch_loss:.4f}')

train_model(model, train_loader, criterion, optimizer, num_epochs=25)


Epoch 0/24, Loss: 7.1101
Epoch 1/24, Loss: 5.8691
Epoch 2/24, Loss: 5.4981
Epoch 3/24, Loss: 5.0628
Epoch 4/24, Loss: 4.3829
Epoch 5/24, Loss: 3.9371
Epoch 6/24, Loss: 3.5554
Epoch 7/24, Loss: 3.2658
Epoch 8/24, Loss: 2.9740
Epoch 9/24, Loss: 2.7454
Epoch 10/24, Loss: 2.6270
Epoch 11/24, Loss: 2.4455
Epoch 12/24, Loss: 2.2646
Epoch 13/24, Loss: 2.1613
Epoch 14/24, Loss: 2.0076
Epoch 15/24, Loss: 1.9584
Epoch 16/24, Loss: 1.8362
Epoch 17/24, Loss: 1.7021
Epoch 18/24, Loss: 1.6685
Epoch 19/24, Loss: 1.6741
Epoch 20/24, Loss: 1.6171
Epoch 21/24, Loss: 1.5599
Epoch 22/24, Loss: 1.4692
Epoch 23/24, Loss: 1.3951
Epoch 24/24, Loss: 1.3185


# Prediction and Submission

In [9]:
import torch
import json

def predict(model, test_loader):
    model.eval()
    predictions = {}
    with torch.no_grad():
        for i, (inputs, _) in enumerate(test_loader):
            inputs = torch.tensor(inputs).unsqueeze(1).float()  # Convert to tensor and add channel dimension
            outputs = model(inputs)
            task_id = list(test_challenges.keys())[i]
            predictions[task_id] = outputs.numpy().astype(int).tolist()
    return predictions

# Create DataLoader for test data
test_loader = DataLoader(GridDataset(test_data), batch_size=1, shuffle=False)

# Make predictions
predictions = predict(model, test_loader)

# Prepare submission
with open('submission.json', 'w') as f:
    json.dump(predictions, f)


  inputs = torch.tensor(inputs).unsqueeze(1).float()  # Convert to tensor and add channel dimension


IndexError: list index out of range