In [1]:
import numpy as np
import json
from transformers import AutoTokenizer
from random import randint

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
class NumpyArrayEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        return json.JSONEncoder.default(self, obj)

In [3]:
def check_zero_rows_cols(permuted_array, sub_grid_x_dim, sub_grid_y_dim):
    reshaped_array = permuted_array.reshape((sub_grid_x_dim, sub_grid_y_dim))
    return np.any(np.all(reshaped_array == 0, axis=0)) or np.any(
        np.all(reshaped_array == 0, axis=1)
    )

In [4]:
# Load the tokenizer
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-Instruct-v0.1")

In [5]:
import generate_prompts as gp

# If num_prompts is 1, then we will print out the prompt. Otherwise, it will save a json file.
num_prompts = 20000
min_grid_dim = 6
max_grid_dim = 12

json_file = []
max_token_length = 0
for i in range(num_prompts):
    if i % 100 == 0:
        print(f"Generating prompt {i}")
    random_puzzle_type = randint(0, 18)
    num_train_tasks = randint(2,3)
    num_test_tasks = 1
    if random_puzzle_type == 0:
        instruction, output, token_length = gp.create_move_obj_puzzle_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 1:
        instruction, output, token_length = gp.create_rotate_obj_puzzle_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 2:
        instruction, output, token_length = gp.create_mirrored_obj_puzzle_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 3:
        instruction, output, token_length = gp.create_scaled_obj_puzzle_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 4:
        instruction, output, token_length = gp.create_swapped_color_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 5:
        instruction, output, token_length = gp.create_same_shape_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 6:
        instruction, output, token_length = gp.create_fill_pattern_holes_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 7:
        instruction, output, token_length = gp.create_fill_rotated_pattern_holes_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 8:
        instruction, output, token_length = gp.create_fill_surrounded_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 9:
        instruction, output, token_length = gp.create_gravity_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 10:
        instruction, output, token_length = gp.create_isolate_obj_grids_prompt(8, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 11:
        instruction, output, token_length = gp.create_largest_smallest_obj_grids_prompt(8, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 12:  
        instruction, output, token_length = gp.create_mask_obj_grids_prompt(8, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 13:
        instruction, output, token_length = gp.create_mirror_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 14:
        instruction, output, token_length = gp.create_most_freq_in_row_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 15:
        instruction, output, token_length = gp.create_multiple_objs_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type == 16:
        instruction, output, token_length = gp.create_rays_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type > 17:
        instruction, output, token_length = gp.create_rotate_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
    elif random_puzzle_type > 18:
        instruction, output, token_length = gp.create_same_color_objs_grids_prompt(min_grid_dim, 
                                                                             max_grid_dim, 
                                                                             num_train_tasks,
                                                                             num_test_tasks,
                                                                             tokenizer)
        
    if token_length > max_token_length:
        max_token_length = token_length

    json_file.append({"instruction": instruction, "output": output})

    if num_prompts == 1:
        print("Instruction:")
        print(instruction)
        print("\nOutput:")
        print(output)

if num_prompts > 1:
    json_string = json.dumps(json_file, cls=NumpyArrayEncoder)
    base_file_name = "../data/ARCSolver_core_puzzles_" + str(num_prompts)
    filename = base_file_name + ".json"
    filepath = filename
    with open(filepath, "w") as outfile:
        outfile.write(json_string)

print(f"\nDone. Max Token Length: {max_token_length}")

Generating prompt 0
Generating prompt 100
Generating prompt 200
Generating prompt 300
Generating prompt 400
Generating prompt 500
Generating prompt 600
Generating prompt 700
Generating prompt 800
Generating prompt 900
Generating prompt 1000
Generating prompt 1100
Generating prompt 1200
Generating prompt 1300
Generating prompt 1400
Generating prompt 1500
Generating prompt 1600
Generating prompt 1700
Generating prompt 1800
Generating prompt 1900
Generating prompt 2000
Generating prompt 2100
Generating prompt 2200
Generating prompt 2300
Generating prompt 2400
Generating prompt 2500
Generating prompt 2600
Generating prompt 2700
Generating prompt 2800
Generating prompt 2900
Generating prompt 3000
Generating prompt 3100
Generating prompt 3200
Generating prompt 3300
Generating prompt 3400
Generating prompt 3500
Generating prompt 3600
Generating prompt 3700
Generating prompt 3800
Generating prompt 3900
Generating prompt 4000
Generating prompt 4100
Generating prompt 4200
Generating prompt 4300
