In [1]:
import ibis
import ibis.selectors as s
import plotly.express as px

# local imports
from ihateai.data import read_training, transform
from ihateai.utils import show_rand_task_pairs

# configuration
px.defaults.template = "plotly_dark"

ibis.options.interactive = True
ibis.options.repr.interactive.max_rows = 10
ibis.options.repr.interactive.max_length = 3
ibis.options.repr.interactive.max_depth = 3
ibis.options.repr.interactive.max_columns = None

con = ibis.get_backend()

In [2]:
t = read_training()
t

In [3]:
t.schema()

ibis.Schema {
  task_num   int64
  file_id    string
  test_len   int64
  train_len  int64
  test       array<struct<input: array<array<int64>>, output: array<array<int64>>>>
  train      array<struct<input: array<array<int64>>, output: array<array<int64>>>>
  filename   string
}

In [4]:
t.count()

┌─────┐
│ [1;36m400[0m │
└─────┘

In [5]:
train = transform(t)
train

In [6]:
show_rand_task_pairs(train)

[{'file_id': 'be94b721', 'task_num': 299, 'example_num': 0, 'input_colors': [0, 1, 2, 3], 'output_colors': [0, 2], 'input_width': 7, 'input_height': 13, 'output_width': 4, 'output_height': 3, 'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 2, 2, 0, 0, 3, 3, 0, 0, 0, 0, 0, 0], [0, 0, 2, 0, 0, 0, 3, 0, 0, 1, 0, 0, 0], [0, 0, 2, 2, 0, 0, 0, 0, 1, 1, 0, 0, 0], [0, 2, 2, 2, 0, 0, 0, 0, 1, 1, 1, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'output': [[2, 2, 0], [0, 2, 0], [0, 2, 2], [2, 2, 2]]}]

Input:


Output:


[{'file_id': 'be94b721', 'task_num': 299, 'example_num': 1, 'input_colors': [0, 3, 4, 6], 'output_colors': [4], 'input_width': 5, 'input_height': 10, 'output_width': 3, 'output_height': 2, 'input': [[0, 0, 0, 0, 0, 0, 0, 6, 6, 0], [0, 3, 0, 0, 4, 4, 0, 0, 6, 0], [3, 3, 3, 0, 4, 4, 0, 0, 0, 0], [0, 3, 0, 0, 4, 4, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'output': [[4, 4], [4, 4], [4, 4]]}]

Input:


Output:


[{'file_id': 'be94b721', 'task_num': 299, 'example_num': 2, 'input_colors': [0, 2, 7, 8], 'output_colors': [0, 8], 'input_width': 6, 'input_height': 11, 'output_width': 4, 'output_height': 3, 'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 8, 8, 8, 0, 0, 0, 0, 7, 7, 0], [0, 0, 8, 0, 0, 0, 2, 0, 0, 7, 0], [0, 8, 8, 0, 0, 2, 2, 0, 0, 7, 0], [0, 8, 8, 0, 0, 0, 2, 0, 0, 7, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]], 'output': [[8, 8, 8], [0, 8, 0], [8, 8, 0], [8, 8, 0]]}]

Input:


Output:


[{'file_id': 'be94b721', 'task_num': 299, 'example_num': 3, 'input_colors': [0, 2, 7, 8], 'output_colors': [0, 2], 'input_width': 7, 'input_height': 9, 'output_width': 3, 'output_height': 3, 'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 7, 0, 0, 2, 2, 2], [0, 0, 0, 7, 7, 0, 0, 2, 0], [0, 0, 0, 0, 7, 0, 2, 2, 2], [8, 8, 8, 0, 0, 0, 0, 0, 0], [0, 8, 0, 0, 0, 0, 0, 0, 0], [0, 0, 0, 0, 0, 0, 0, 0, 0]], 'output': [[2, 2, 2], [0, 2, 0], [2, 2, 2]]}]

Input:


Output:


In [None]:
train.mutate(
    input_width_eq_output_width=train["input_width"] == train["output_width"],
    input_height_eq_output_height=train["input_height"] == train["output_height"],
).relocate(
    train.columns[:-2], "input_width_eq_output_width", "input_height_eq_output_height"
)

In [None]:
train.group_by("task_num").agg(
    task_input_widths=train["input_width"].collect().unique().sort(),
    task_input_heights=train["input_height"].collect().unique().sort(),
    task_output_widths=train["output_width"].collect().unique().sort(),
    task_output_heights=train["output_height"].collect().unique().sort(),
).mutate(
    task_input_widths_same=ibis._["task_input_widths"].length() == 1,
    task_input_heights_same=ibis._["task_input_heights"].length() == 1,
    task_output_widths_same=ibis._["task_output_widths"].length() == 1,
    task_output_heights_same=ibis._["task_output_heights"].length() == 1,
).order_by("task_num").select(s.contains("_same")).value_counts().order_by(
    s.across(s.contains("_count"), ibis.desc)
)

In [None]:
train.select(train["output_colors"].unnest()).distinct().count()

In [None]:
test = transform(t, test=True)
test

In [None]:
train.count()

In [None]:
test.count()

In [None]:
def decision_tree(t):
    res = (
        t.select(
            color_match=t["input_colors"] == t["output_colors"],
            grid_match=(
                (t["input_width"] == t["output_width"])
                & (t["input_height"] == t["output_height"])
            ),
            width_match=t["input_width"] == t["output_width"],
            height_match=t["input_height"] == t["output_height"],
        )
        .value_counts()
        .order_by("color_match", "grid_match", "width_match", "height_match")
        .mutate(
            percent=s.across(
                s.contains("count"),
                lambda x: 100 * x / x.sum(),
            )
        )
    )

    return res

In [None]:
decision_tree(train)

In [None]:
decision_tree(test)

In [None]:
train = (
    train.join(
        train.select(
            "task_num",
            "example_num",
            train["input"].unnest(),
            train["output"].unnest(),
        )
        .group_by(
            "task_num",
            "example_num",
        )
        .agg(
            input_colors=ibis._["input"].collect().flatten().unique().sort(),
            output_colors=ibis._["output"].collect().flatten().unique().sort(),
        ),
        ["task_num", "example_num"],
    )
    .relocate(train.columns[:3], "input_colors", "output_colors")
    .drop(s.contains("_right"))
    .order_by("task_num", "example_num")
)
train

In [None]:
train.count()

In [None]:
train.select(
    color_match=train["input_colors"] == train["output_colors"],
    grid_match=(
        (train["input_width"] == train["output_width"])
        & (train["input_height"] == train["output_height"])
    ),
    input_match=(train["input_colors"].length() == train["output_colors"].length()),
    output_match=(train["input_colors"].length() == train["output_colors"].length()),
).value_counts().order_by("color_match", "grid_match", "input_match", "output_match")

In [None]:
train["input"].unnest().as_table()["input"].unique().name("input_colors")

In [None]:
train.group_by("input_width", "input_height", "output_width", "output_height").agg(
    count=train.count()
).order_by(ibis.desc("count"))

In [None]:
test = t.select("task_num", t["test"].unnest()).select(
    "task_num",
    ibis._["test"]["input"],
    ibis._["test"]["output"],
)
test

In [None]:
train.mutate(
    width=ibis._["input"].length(),
    height=ibis._["input"][0].length(),
).relocate

In [None]:
train.count()

In [None]:
test.count()

In [None]:
test["task_num"].value_counts().order_by(ibis.desc("task_num_count"))

In [None]:
test.filter(test["task_num"] == 398)