<a href="https://colab.research.google.com/github/shravya1998/MLProjects/blob/main/ARC_AGI.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'arc-prize-2024:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-competitions-data%2Fkaggle-v2%2F67357%2F8951125%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240911%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240911T213622Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D11cdeab9216626a7e4be6c39e6c5587c4c385e845431b97e23a32bb25708dd3d699646bd2a262121e28942c38a38918536cfc58684911ac9e28a205870d52ec73ada90b444dcb60a4e35be7c78713acebd3a328621557daa98b8428e14213d7c8c4ccd160e24458110c0cdac606090e86afc24ed518c871cb7e820c709792c6ed98c22718bb36781cdb1e8742d304c284f1e00aa5d1f63d7d1c133e92d9d9eecd07d715883609b245a0f2f1ea539cca2e800fc980eb8a179b348a88c61da545d0e8deaec572f2e9f9a2c03e1704e4b736f17cf3d5b1650523727834415d328a76462940244bc8571b5c48c7aebb8039779ceec5ba2b11f0dca6a5265b86825c1'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
import json
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models
import matplotlib.pyplot as plt

# Load ARC dataset
def load_arc_data(file_path):
    with open(file_path, 'r') as f:
        return json.load(f)

# Function to extract train and test data from a task
def extract_task_data(task):
    train_inputs = [np.array(pair["input"]) for pair in task["train"]]
    train_outputs = [np.array(pair["output"]) for pair in task["train"]]
    test_inputs = [np.array(test["input"]) for test in task["test"]]
    return train_inputs, train_outputs, test_inputs

# Pad grids to the maximum size of 30x30
def pad_grid(grid, target_size=(30, 30)):
    padded_grid = np.zeros(target_size, dtype=int)
    grid_height, grid_width = grid.shape
    padded_grid[:grid_height, :grid_width] = grid
    return padded_grid

# Prepare the data for model input
def prepare_data(tasks):
    x_train, y_train = [], []
    for task_id, task in tasks.items():
        train_inputs, train_outputs, _ = extract_task_data(task)
        if train_inputs is None or train_outputs is None:
            continue
        for inp, out in zip(train_inputs, train_outputs):
            inp_padded = pad_grid(inp)
            out_padded = pad_grid(out)
            x_train.append(inp_padded)
            y_train.append(out_padded)

    x_train = np.array(x_train).reshape(-1, 30, 30, 1)  # Add channel dimension
    y_train = np.array(y_train).reshape(-1, 30, 30)      # Use integer class labels for sparse_categorical_crossentropy

    return x_train, y_train

# Define the ARC model architecture
def create_arc_model(input_shape):
    model = models.Sequential([
        layers.Input(shape=input_shape),
        layers.Conv2D(64, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu', padding='same'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(256, (3, 3), activation='relu', padding='same'),
        layers.Flatten(),
        layers.Dense(30 * 30 * 10, activation='softmax'),  # Output 10 classes per pixel
        layers.Reshape((30, 30, 10))  # Reshape to (30, 30, 10)
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model

# File paths
train_path = "/kaggle/input/arc-prize-2024/arc-agi_training_challenges.json"
eval_path = "/kaggle/input/arc-prize-2024/arc-agi_evaluation_challenges.json"

# Load datasets
train_data = load_arc_data(train_path)
eval_data = load_arc_data(eval_path)

# Prepare training data
x_train, y_train = prepare_data(train_data)

# Create and summarize the model
input_shape = (30, 30, 1)  # Input shape (30, 30, 1)
model = create_arc_model(input_shape)
model.summary()

# Train the model
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_split=0.2)


In [None]:
# Predict the output for a test grid
def predict_output(model, test_input):
    test_input_padded = pad_grid(test_input).reshape(1, 30, 30, 1)
    predicted_output = model.predict(test_input_padded)
    predicted_output = np.argmax(predicted_output, axis=-1).reshape(30, 30)
    return predicted_output

# Create submission JSON file
def create_submission(tasks, model):
    submission = []
    for task_id, task in tasks.items():
        test_inputs = extract_task_data(task)[2]
        if test_inputs is None:
            continue
        task_predictions = []
        for test_input in test_inputs:
            predicted_output_1 = predict_output(model, test_input)
            predicted_output_2 = predict_output(model, test_input)  # Another strategy for second prediction
            task_predictions.append({
                "attempt_1": predicted_output_1.tolist(),
                "attempt_2": predicted_output_2.tolist()
            })
        submission.append({
            "task_id": task_id,
            "predictions": task_predictions
        })

    print(submission)

    with open('submission.json', 'w') as f:
        json.dump(submission, f)

# Create submission file
create_submission(eval_data, model)