In [7]:
import os
import json
from pprint import pprint


In [8]:
def load_all_tasks(folder_path):
    tasks = {}
    for filename in os.listdir(folder_path):
        if filename.endswith(".json"):
            task_id = filename.replace(".json", "")
            filepath = os.path.join(folder_path, filename)
            with open(filepath, "r") as f:
                task_data = json.load(f)
                tasks[task_id] = task_data
    return tasks

## Nonzero coordinates

States the row and column of each non-zero value cell in a grid.

In [9]:
def encode_nonzero_coordinates(grid):
    coords = []
    for i, row in enumerate(grid):
        for j, val in enumerate(row):
            if val != 0:
                coords.append({"row": i, "col": j, "val": val})
    return coords

In [10]:
def encode_task_nonzero_coords(task):
    encoded = {"train": [], "test": []}

    for pair in task["train"]:
        encoded["train"].append({
            "input": encode_nonzero_coordinates(pair["input"]),
            "output": encode_nonzero_coordinates(pair["output"])
        })

    return encoded

In [12]:
# Load tasks
tasks = load_all_tasks("Evaluation_set")

# Pick one task
task_id = "0b17323b"  # example
task = tasks[task_id]

# Encode it
nonzero_encoded_task = encode_task_nonzero_coords(task)

# Print result
pprint(nonzero_encoded_task["train"][0])

{'input': [{'col': 1, 'row': 1, 'val': 1},
           {'col': 3, 'row': 3, 'val': 1},
           {'col': 5, 'row': 5, 'val': 1}],
 'output': [{'col': 1, 'row': 1, 'val': 1},
            {'col': 3, 'row': 3, 'val': 1},
            {'col': 5, 'row': 5, 'val': 1},
            {'col': 7, 'row': 7, 'val': 2},
            {'col': 9, 'row': 9, 'val': 2},
            {'col': 11, 'row': 11, 'val': 2},
            {'col': 13, 'row': 13, 'val': 2}]}


In [17]:
pprint(task["train"][0])

{'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
 'output': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           

## Object / Bounding Box Encoding

This encoding identifies each connected group of same-colored, non-zero cells in the grid as an individual object, and describes it using its color, position, and shape.

- color:
    The integer value (≥1) representing the color of the object in the grid.

- top_left:
    The [row, col] coordinates of the top-left corner of the object’s bounding box.

- width:
    The number of columns (horizontal span) the object occupies.

- height:
    The number of rows (vertical span) the object occupies.

- pixels:
    A list of all (row, col) positions that belong to the object — i.e., every grid cell that is part of this same-colored connected component.

In [14]:
from collections import deque

def extract_objects(grid):
    visited = set()
    objects = []
    rows, cols = len(grid), len(grid[0])

    def bfs(r, c, color):
        q = deque([(r, c)])
        visited.add((r, c))
        pixels = [(r, c)]
        min_r, min_c = r, c
        max_r, max_c = r, c

        while q:
            cr, cc = q.popleft()
            for dr, dc in [(-1,0), (1,0), (0,-1), (0,1)]:
                nr, nc = cr + dr, cc + dc
                if (
                    0 <= nr < rows and
                    0 <= nc < cols and
                    (nr, nc) not in visited and
                    grid[nr][nc] == color
                ):
                    visited.add((nr, nc))
                    q.append((nr, nc))
                    pixels.append((nr, nc))
                    min_r = min(min_r, nr)
                    min_c = min(min_c, nc)
                    max_r = max(max_r, nr)
                    max_c = max(max_c, nc)

        return {
            "color": color,
            "top_left": [min_r, min_c],
            "width": max_c - min_c + 1,
            "height": max_r - min_r + 1,
            "pixels": pixels  # optional, remove if not needed
        }

    for r in range(rows):
        for c in range(cols):
            color = grid[r][c]
            if color != 0 and (r, c) not in visited:
                objects.append(bfs(r, c, color))

    return objects


In [15]:
def encode_task_objects(task):
    encoded = {"train": [], "test": []}

    for pair in task["train"]:
        encoded["train"].append({
            "input": extract_objects(pair["input"]),
            "output": extract_objects(pair["output"])
        })

    for pair in task["test"]:
        encoded["test"].append({
            "input": extract_objects(pair["input"])
        })

    return encoded


In [16]:
# Encode with object/bounding box representation
object_encoded_task = encode_task_objects(task)

# Print result
pprint(object_encoded_task["train"][0])


{'input': [{'color': 1,
            'height': 1,
            'pixels': [(1, 1)],
            'top_left': [1, 1],
            'width': 1},
           {'color': 1,
            'height': 1,
            'pixels': [(3, 3)],
            'top_left': [3, 3],
            'width': 1},
           {'color': 1,
            'height': 1,
            'pixels': [(5, 5)],
            'top_left': [5, 5],
            'width': 1}],
 'output': [{'color': 1,
             'height': 1,
             'pixels': [(1, 1)],
             'top_left': [1, 1],
             'width': 1},
            {'color': 1,
             'height': 1,
             'pixels': [(3, 3)],
             'top_left': [3, 3],
             'width': 1},
            {'color': 1,
             'height': 1,
             'pixels': [(5, 5)],
             'top_left': [5, 5],
             'width': 1},
            {'color': 2,
             'height': 1,
             'pixels': [(7, 7)],
             'top_left': [7, 7],
             'width': 1},
            {

In [19]:
pprint(task["train"][0])

{'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]],
 'output': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
            [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
           