In [None]:
!pip install openai
!pip install tiktoken

In [21]:
# imports
from openai import OpenAI # for calling the OpenAI API
import tiktoken  # for counting tokens
import pandas as pd  # for storing text and embeddings data
import os # for getting API token from env variable OPENAI_API_KEY
import glob
import json
from matplotlib import pyplot as plt
from string import Template

# models
GPT_MODEL = "gpt-3.5-turbo"
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "<your OpenAI API key if not set as env var>"))

In [8]:
def num_tokens(text: str, model: str = GPT_MODEL) -> int:
    """Return the number of tokens in a string."""
    encoding = tiktoken.encoding_for_model(model)
    return len(encoding.encode(text))

In [9]:
def construct_message(task):
    token_budget = 4000

    description = \
"""\
\"Given a series of input-output pairs of grids to train on, find the rule that transforms \
input grids to output grids. Then use that rule on the test input grid to produce test \
output grids. Only write the outputs and nothing else. If there are multiple test inputs, \
separate the multiple test outputs with a semicolon ';'. Each test output should be outputed \
as a 2D array, using square brackets and commas to separate. \
"""

    message_template = Template(description + \
"""\
Here are the training pairs:\\n\\n\
$train_pairs\\n\\n\
Here are the testing inputs:\\n\\n\
$testing_inputs\"\
""")

    testing_inputs = ';'.join([str(pair['input']) for pair in task['test']]).replace(' ', '')
    message = message_template.substitute(
        train_pairs='',
        testing_inputs=testing_inputs
    )
    for i in range(len(task['train'])):
        message_with_added_pair = message_template.substitute(
            train_pairs=str(task['train'][0:i+1]).replace(' ', ''),
            testing_inputs=testing_inputs
        )
        if num_tokens(message_with_added_pair) < token_budget:
            message = message_with_added_pair
        
    return message

In [44]:
def flattener(pred):
    str_pred = str([row for row in pred])
    str_pred = str_pred.replace(', ', '')
    str_pred = str_pred.replace('[[', '|')
    str_pred = str_pred.replace('][', '|')
    str_pred = str_pred.replace(']]', '|')
    return str_pred

In [61]:
with open('submission.csv', 'w') as output_file:
    output_file.write('output_id,output\n')
    for idx, file_name in enumerate(glob.glob('../input/abstraction-and-reasoning-challenge/test/*.json')):
        print(idx, end='\r')
        with open(file_name, 'r') as test_task_file:
            test_task = json.load(test_task_file)
            messages = [
                {'role': 'system', 'content': 'You learn rules which transform input grids into output grids.'},
                {'role': 'user', 'content': construct_message(test_task)},
            ]
            response = client.chat.completions.create(
                model='ft:gpt-3.5-turbo-0613:personal:arc:8oomlieW',
                messages=messages,
                temperature=0
            )
            responses = response.choices[0].message.content.split(';')
            for i in range(len(responses)):
                try:
                    output = flattener(eval(responses[i]))
                    output_file.write(f'{file_name.split("/")[-1].split(".")[0]}_{i},{output}\n')
                except: 
                    pass