In [5]:
import arckit
train_set, eval_set = arckit.load_data() # Load ARC1 train/eval
import drawsvg
import os
import ast
import dotenv
import openai
import numpy as np
from openai import OpenAI
dotenv.load_dotenv()
import arckit.vis as vis

In [6]:
def output_image(inp, out, task_id, train=True, i=0):
        string = "train" if train else "test"
        os.makedirs(f"images/{string}_{task_id}", exist_ok=True)

        inp = vis.draw_grid(inp, xmax=3, ymax=3, padding=.5, label='Example')
        vis.output_drawing(inp, f"images/{string}_{task_id}/{i}_input.png") # svg/pdf/png

        out = vis.draw_grid(out, xmax=3, ymax=3, padding=.5, label='Example')
        vis.output_drawing(out, f"images/{string}_{task_id}/{i}_output.png") # svg/pdf/png

In [7]:
import shutil

def generate_images(task):
    for i in range(len(task.train)):
        inp = task.train[i][0]
        out = task.train[i][1]
        output_image(inp, out, task_id, train=True, i=i)

In [9]:
client = OpenAI(
    api_key=os.environ['OPENAI_API_KEY'],
) 

In [10]:
# Instantiate the OpenAI client
client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

def array_to_string(array):
    return np.array2string(array, separator=', ')

def get_example_string(input, output):
    out = ""
    seperator = "-"*10
    
    out += f"{seperator}\n"
    out += f"{seperator}\n"
    out += f"Input shape:\n{input.shape}\n"
    out += f"Input:\n{array_to_string(input)}\n"
    out += f"{seperator}\n"

    out += "Output:\n"
    if output is not None:
        out += f"{array_to_string(output)}\n"
        out += f"Output shape:\n{output.shape}\n"
        out += f"{seperator}\n"
        out += f"{seperator}\n"
    return out
    
def gen_example_strings(task):
    examples = []
    out_str = ""
    for train_ex in task.train:
        inp = np.array(train_ex[0])
        out = np.array(train_ex[1])
        # print(inp, out)
        # examples.append({"input": inp, "output": out})
        example_string = get_example_string(inp, out)
        # print(type(example_string))
        out_str = out_str + example_string

    test_inp = np.array(task.test[0][0])
    test_str = get_example_string(test_inp, None)
    out_str += test_str
    return out_str

def get_result(task):
    example_strings = gen_example_strings(task)
    messages = [
        {"role": "user", "content": "You are a super intelligent agent that can solve any problem given a set of examples. You will be given a set of examples, and a validation input. Your job is to generate a valid output for the validation input. DO NOT INCLUDE ANY OTHER TEXT IN YOUR RESPONSE."},
        {"role": "user", "content": example_strings}
    ]
    completion = client.chat.completions.create(
        # model="o1-preview",
        model="gpt-4o",
        # model="grok-beta",
        messages=messages,
        # max_tokens=4024,
        max_completion_tokens=4024
    )
    return completion

In [34]:
def do_one_task(task):
    print(f"Task ID: {task.id}")
    generate_images(task)

    example_strings = gen_example_strings(task)
    completion = get_result(task)
    msg = completion.choices[0].message.content
    array_list = ast.literal_eval(msg)
    out = np.array(array_list)

    output_image(task.test[0][0], out, task.id, train=False, i=0)
    expected = task.test[0][1]
    output_image(task.test[0][0], expected, task.id, train=False, i=1)
    if np.array_equal(out, expected):
        print(f"Task {task.id} passed")
        return True
    else:
        print(f"Task {task.id} failed")
        return False

Task ID: 08ed6ac7
Task 08ed6ac7 passed


In [37]:
passed = 0
total = 0
for i in range(len(eval_set)):
    total += 1
    try: 
        do_one_task(eval_set[i])
        passed += 1
    except Exception as e:
        print(f"Task {eval_set[i].id} failed with error: {e}")

print(f"Passed {passed} out of {total} tasks")

Task ID: 00576224
Task 00576224 passed
Task ID: 009d5c81
Task 009d5c81 failed
Task ID: 00dbd492
Task 00dbd492 failed
Task ID: 03560426
Task 03560426 failed
Task ID: 05a7bcf2
Task 05a7bcf2 failed with error: '[' was never closed (<unknown>, line 1)
Task ID: 0607ce86
Task 0607ce86 failed
Task ID: 0692e18c
Task 0692e18c failed
Task ID: 070dd51e
Task 070dd51e failed
Task ID: 08573cc6
Task 08573cc6 failed
Task ID: 0934a4d8
Task 0934a4d8 failed
Task ID: 09c534e7
Task 09c534e7 failed with error: invalid syntax (<unknown>, line 1)
Task ID: 0a1d4ef5
Task 0a1d4ef5 failed
Task ID: 0a2355a6
Task 0a2355a6 failed
Task ID: 0b17323b
Task 0b17323b failed
Task ID: 0bb8deee
Task 0bb8deee failed with error: invalid syntax (<unknown>, line 1)
Task ID: 0becf7df
Task 0becf7df failed
Task ID: 0c786b71
Task 0c786b71 failed
Task ID: 0c9aba6e
Task 0c9aba6e failed
Task ID: 0d87d2a6
Task 0d87d2a6 failed with error: unexpected character after line continuation character (<unknown>, line 1)
Task ID: 0e671a1a
Task 0e