In [1]:
from openai import OpenAI
import os

client = OpenAI(
    base_url="https://openrouter.ai/api/v1",
    api_key=os.getenv("OPENROUTER_API_KEY"),
)

def ask(messages):
    output =  client.chat.completions.create(
        model="anthropic/claude-3.5-sonnet",
        messages=messages,
        temperature=0.5,
        max_tokens=2048,
        top_p=0.95,
        stop=[],
    )

    return output.choices[0].message.content

In [2]:
from datetime import datetime
import traceback
import builtins

timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
log_filename = f'attempts/attempt-{timestamp}.txt'

# Redirect print statements to writing to a file
def print(*args, **kwargs):
    builtins.print(*args, **kwargs)
    with open(log_filename, 'a') as f:
        f.write(' '.join(map(str, args)) + '\n')

In [3]:
import json
with open('puzzles/arc-agi_test_challenges.json', 'r') as f:
    data = json.load(f)

# loading example puzzle
data = data["1190e5a7"]

In [4]:
emoji_map = {
    0: '⚫', 1: '🔵', 2: '🔴', 3: '🟢', 4: '🟡',
    5: '⚪', 6: '🟣', 7: '🟠', 8: '🔹', 9: '🟥'
}

def grid_to_emoji(grid):
    return '\n'.join([''.join([emoji_map[cell] for cell in row]) for row in grid])
    # return "\n".join(["".join([str(ele) for ele in row]) for row in grid])

add_to_prompt = ""

for i in range(len(data["train"])):
    add_to_prompt += f"Train {i}\n"
    add_to_prompt += "Input:\n"
    add_to_prompt += grid_to_emoji(data["train"][i]["input"]) + "\n"
    add_to_prompt += "Output:\n"
    add_to_prompt += grid_to_emoji(data["train"][i]["output"]) + "\n\n"

for i in range(len(data["test"])):
    add_to_prompt += f"Test {i}\n"
    add_to_prompt += "Input:\n"
    add_to_prompt += grid_to_emoji(data["test"][i]["input"]) + "\n"
    add_to_prompt += "Output:\n"
    add_to_prompt += "<find transformation to predict the test output>\n\n"
    
print(add_to_prompt)

Train 0
Input:
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠🟠
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
🟢🟠🟢🟢🟢🟢🟢🟢🟢🟢🟠🟢🟢🟠🟢
Output:
🟢🟢🟢🟢
🟢🟢🟢🟢

Train 1
Input:
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹🔹
🔵🔵🔵🔵🔹🔵🔵🔵🔵🔵🔵
Output:
🔵🔵
🔵🔵
🔵🔵

Train 2
Input:
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🟢🔵🟢🔵🟢🔵🟢
🟢🟢🟢🟢🟢🟢🔵🟢🟢🟢🟢🟢🟢🟢

In [5]:
with open('prompts/direct_prompt.txt', 'r') as f:
    initial_prompt =  f.read()

initial_prompt = initial_prompt.replace("{add_to_prompt}", add_to_prompt)

In [6]:
for attempt in range(100):
    messages = [
        {
            "role": "user",
            "content": [
                {
                    "type": "text",
                    "text": initial_prompt
                },
            ],
        }
    ]

    output = ask(messages)
    print(output)

    def solve():
        return "This function has not been implemented yet - please implement the solution"

    code = output.split("<DSL>")[1].split("</DSL>")[0] # extract just the code
    exec(code) # run exec on your code to use the solve function that you wrote

    found_solution = True
    for grid in data["train"]:
        try:
            code_output = solve(grid["input"]) # this function must have been written by you, and created when i run exec
        except:
            found_solution = False
            break
        print("Code output:")
        print(grid_to_emoji(code_output))
        print("Expected output:")
        print(grid_to_emoji(grid["output"]))
        if code_output != grid["output"]:
            found_solution = False
            break

    if found_solution:
        print("Solution found!")
        print("Test output:")
        for grid in data["test"]:
            print(solve(grid["input"]))
        break
    else:
        print(f"Attempt {attempt + 1}: Solution not found")

Let me analyze the patterns in the training examples:

1) Looking at the input-output pairs:
- Train 0: Takes a grid with horizontal orange stripes and outputs a 4x2 grid of all green
- Train 1: Takes a grid with vertical light blue stripes and outputs a 3x2 grid of all blue
- Train 2: Takes a grid with horizontal blue stripes and outputs a 6x5 grid of all green
- Test 0: Has a similar pattern with horizontal white stripes

Key observations:
1. Each input has horizontal stripes of a different color (orange, light blue, blue, white) separating regions
2. The output seems to take one of the colors from the main regions (not the stripe color) and creates a rectangular grid
3. The dimensions of the output grid seem related to the number of regions created by the stripes

For Test 0:
- The input has white horizontal stripes (⚪) separating regions of blue (🔵)
- Following the pattern from training examples, the output should be a grid of all blue
- Counting the regions between stripes in Test

In [7]:
print(grid_to_emoji(data["test"][0]["input"]))

🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪⚪
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵
🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵🔵⚪🔵🔵🔵🔵⚪🔵


In [8]:
print(grid_to_emoji(solve(data["test"][0]["input"])))

🔵🔵🔵
🔵🔵🔵
🔵🔵🔵
🔵🔵🔵
🔵🔵🔵
