In [1]:
%pip install -q -U bitsandbytes
%pip install -q -U git+https://github.com/huggingface/transformers.git
%pip install -q -U git+https://github.com/huggingface/peft.git

Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.
Note: you may need to restart the kernel to use updated packages.


In [2]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

In [3]:
model_id = "mistralai/Mistral-7B-Instruct-v0.1"
model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=bnb_config, device_map={"":0})
tokenizer = AutoTokenizer.from_pretrained(model_id, add_eos_token=True)

'''
The following warnings can be ignored. I think they are caused by the fact that the model 
is loaded in a single GPU and the parameters are offloaded to the CPU:

WARNING:root:Some parameters are on the meta device device because they were offloaded to the .
WARNING:root:Some parameters are on the meta device device because they were offloaded to the cpu/disk.
'''

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]





In [4]:
import numpy as np
import random

min_num_rows_and_columns = 1
max_num_rows_and_columns = 8

train_grid_names = [
    ["Train_Input_1", "Train_Output_1"],
    ["Train_Input_2", "Train_Output_2"],
    ["Train_Input_3", "Train_Output_3"],
    ["Train_Input_4", "Train_Output_4"],
    ["Train_Input_5", "Train_Output_5"],
]
test_grid_names = [
    ["Test_Input_1", "Test_Output_1"],
    ["Test_Input_2", "Test_Output_2"],
]

colors = ["black", "blue", "red", "green", "yellow", "gray", "magenta", "orange", "cyan", "brown"]

row_size = random.randint(min_num_rows_and_columns, max_num_rows_and_columns)
column_size = random.randint(min_num_rows_and_columns, max_num_rows_and_columns)
random_array = np.random.randint(0, 10, size=(row_size, column_size))

num_rows = len(random_array)
num_columns = len(random_array[0])

# Display grid in better visual format
print("\nVisualization of grid:\n")
grid = ""
for i in range(num_rows):
    for j in range(num_columns):
        grid += str(random_array[i][j])
    grid += "\n"
print(grid)

train_or_test = random.randint(0, 1)
if train_or_test == 0:
    grid_name = test_grid_names[random.randint(0, len(test_grid_names) - 1)][
        random.randint(0, len(test_grid_names[0]) - 1)
    ]
else:
    grid_name = train_grid_names[random.randint(0, len(train_grid_names) - 1)][
        random.randint(0, len(train_grid_names[0]) - 1)
    ]

random_row = random.randint(0, num_rows - 1)
random_column = random.randint(0, num_columns - 1)

instruction = (
    "An ARCSolver grid is a rectangular 2D array of digits from 0 to 9. "
)
instruction += "An ARCSolver grid's digits represent the colors of each grid square rather than scalar values. "
instruction += "0 = black, 1 = blue, 2 = red, 3 = green, 4 = yellow, 5 = gray, 6 = magenta, 7 = orange, 8 = cyan, 9 = brown. "
instruction += "If ARCSolver grid "
instruction += grid_name + " = "

instruction += "["
for i in range(num_rows):
    if i == 0:
        instruction += "["
    for j in range(num_columns):
        instruction += str(random_array[i][j])
        if j < num_columns - 1:
            instruction += ","
        else:
            instruction += "]"
    if i < num_rows - 1:
        instruction += ",["
    else:
        instruction += "]"

instruction += ". "

random_question = random.randint(0, 4)
if random_question == 0:
    # Row question
    instruction += "What is " + grid_name + "[" + str(random_row) + "]?"
    ground_truth = random_array[random_row]
elif random_question == 1:
    instruction += "How many rows does " + grid_name + " have?"
    ground_truth = num_rows
elif random_question == 2:
    instruction += "How many columns does " + grid_name + " have?"
    ground_truth = num_columns
elif random_question == 3:
    instruction += (
        "What color is is the ARCSolver grid square at "
        + grid_name
        + "["
        + str(random_row)
        + "]["
        + str(random_column)
        + "]?"
    )
    ground_truth = colors[random_array[random_row][random_column]]
else:
    # Square question
    instruction += (
        "What digit is "
        + grid_name
        + "["
        + str(random_row)
        + "]["
        + str(random_column)
        + "]?"
    )
    ground_truth = random_array[random_row][random_column]

query = instruction
prompt = '<s>[INST] ' + query + ' [/INST]'
print("Query:\n", prompt)
encodeds = tokenizer(prompt, return_tensors="pt", add_special_tokens=False)
model_inputs = encodeds.to("cuda")

# tokenizer.add_special_tokens({'pad_token': '[PAD]'})
# model.resize_token_embeddings(len(tokenizer))
# tokenizer.padding_side = "right"
tokenizer.pad_token = tokenizer.eos_token

generated_ids = model.generate(**model_inputs, max_new_tokens=256, do_sample=True, pad_token_id=tokenizer.eos_token_id)
decoded = tokenizer.batch_decode(generated_ids)
result = decoded[0]
print("\nResult:\n", result)

print("\nGround truth:\n", ground_truth)

# Tokenize the request text
tokenized_request = tokenizer.tokenize(result)

# Get the token length
token_length = len(tokenized_request)

print("\nNumber of tokens: ", token_length)


Visualization of grid:

90459107
00760823

Query:
 <s>[INST] An ARCSolver grid is a rectangular 2D array of digits from 0 to 9. An ARCSolver grid's digits represent the colors of each grid square rather than scalar values. 0 = black, 1 = blue, 2 = red, 3 = green, 4 = yellow, 5 = gray, 6 = magenta, 7 = orange, 8 = cyan, 9 = brown. If ARCSolver grid Train_Output_5 = [[9,0,4,5,9,1,0,7],[0,0,7,6,0,8,2,3]]. What color is is the ARCSolver grid square at Train_Output_5[0][3]? [/INST]

Result:
 <s> [INST] An ARCSolver grid is a rectangular 2D array of digits from 0 to 9. An ARCSolver grid's digits represent the colors of each grid square rather than scalar values. 0 = black, 1 = blue, 2 = red, 3 = green, 4 = yellow, 5 = gray, 6 = magenta, 7 = orange, 8 = cyan, 9 = brown. If ARCSolver grid Train_Output_5 = [[9,0,4,5,9,1,0,7],[0,0,7,6,0,8,2,3]]. What color is is the ARCSolver grid square at Train_Output_5[0][3]? [/INST] The color of the ARCSolver grid square at Train_Output_5[0][3] is magenta.<