In [1]:
import numpy as np
import json
from transformers import AutoTokenizer
from random import randint

In [2]:
class NumpyArrayEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [3]:
def check_zero_rows_cols(permuted_array, sub_grid_x_dim, sub_grid_y_dim):
    reshaped_array = permuted_array.reshape((sub_grid_x_dim, sub_grid_y_dim))
    return np.any(np.all(reshaped_array == 0, axis=0)) or np.any(
        np.all(reshaped_array == 0, axis=1)
    )

In [4]:
# If num_prompts is 1, then we will print out the prompt. Otherwise, it will save a json file.
num_prompts = 1
min_grid_dim = 6
max_grid_dim = 8

In [5]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

In [20]:
import generate_prompts as gp

json_file = []
max_token_length = 0
for i in range(num_prompts):
    random_puzzle_type = randint(0, 5)
    if random_puzzle_type == 0:
        instruction, output, token_length = gp.create_move_obj_puzzle_prompt(min_grid_dim, max_grid_dim, tokenizer)
    elif random_puzzle_type == 1:
        instruction, output, token_length = gp.create_rotate_obj_puzzle_prompt(min_grid_dim, max_grid_dim, tokenizer)
    elif random_puzzle_type == 2:
        instruction, output, token_length = gp.create_mirrored_obj_puzzle_prompt(min_grid_dim, max_grid_dim, tokenizer)
    elif random_puzzle_type == 3:
        instruction, output, token_length = gp.create_scaled_obj_puzzle_prompt(min_grid_dim, max_grid_dim, tokenizer)
    elif random_puzzle_type == 4:
        instruction, output, token_length = gp.create_swapped_color_grids_prompt(min_grid_dim, max_grid_dim, tokenizer)
    elif random_puzzle_type == 5:
        instruction, output, token_length = gp.create_same_shape_grids_prompt(8, max_grid_dim, tokenizer)
        
    if token_length > max_token_length:
        max_token_length = token_length

    json_file.append({"instruction": instruction, "output": output})

    if num_prompts == 1:
        print("Instruction:")
        print(instruction)
        print("\nOutput:")
        print(output)

if num_prompts > 1:
    json_string = json.dumps(json_file, cls=NumpyArrayEncoder)
    base_file_name = "../data/ARCSolver_core_puzzles_" + str(num_prompts)
    filename = base_file_name + ".json"
    filepath = filename
    with open(filepath, "w") as outfile:
        outfile.write(json_string)

print(f"\nMax Token Length: {max_token_length}")

Instruction:
Given the following input/output train pairs of ARCSolver grids: Train_Input_1=[[0,0,0,0,0,0,0,0],[0,0,0,0,0,4,2,0],[0,0,0,0,0,2,3,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0]] and Train_Output_1=[[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,4,2,0,0,0,0],[0,0,2,3,0,0,0,0],[0,0,0,0,0,0,0,0]], Train_Input_2=[[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,9,6,1],[0,0,0,5,8,6,1],[0,0,0,3,0,0,4],[0,0,0,8,6,9,4],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0]] and Train_Output_2=[[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,0,0,0,0,0,0],[0,9,6,1,0,0,0],[5,8,6,1,0,0,0],[3,0,0,4,0,0,0],[8,6,9,4,0,0,0]], Train_Input_3=[[0,0,0,0,0,0,0,0],[0,0,0,0,1,7,1,0],[0,0,0,0,3,2,5,0],[0,0,0,0,7,6,2,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0]] and Train_Output_3=[[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,0,0,0,0,0,0,0],[0,1,7,1,0,0,0,0],[0,3,2,5,0,0,0,0],[0,7,6,2,0,0,0,0],[0,0,0,0,0,0,0,0]]. Find the transformation from each input grid to output grid that is common to all 3 