### Imports & GPU Selection

In [1]:

import numpy as np
import json
import re
import matplotlib.pyplot as plt
import datetime


import os
os.environ['CUDA_DEVICE_ORDER']='PCI_BUS_ID'
os.environ['CUDA_VISIBLE_DEVICES'] = '4,5' #'0,1,2,3,4,5,6,7'

import torch
torch.cuda.is_available()

True

In [2]:
os.environ["HUGGINGFACEHUB_API_TOKEN"] = "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"

### Load Llama Model

In [3]:
MODEL_NAME = "ICBU-NPU/FashionGPT-70B-V1.1" 
#MODEL_NAME = "TheBloke/Llama-2-70b-Chat-GPTQ"
MODEL_NAME = "NousResearch/Llama-2-7b-chat-hf"
#test: 
#MODEL_NAME = "meta-llama/Llama-2-70b-chat"

In [4]:
#################################### with llama ####################################
import torch
from langchain import HuggingFacePipeline
from transformers import AutoModelForCausalLM, AutoTokenizer, GenerationConfig, pipeline

tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, use_fast=True)

model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME, torch_dtype=torch.float16, trust_remote_code=True, device_map="auto"
)
generation_config = GenerationConfig.from_pretrained(MODEL_NAME)
generation_config.max_new_tokens = 1024
generation_config.temperature = 0.0001
generation_config.top_p = 0.9
generation_config.do_sample = True
generation_config.repetition_penalty = 1.15

text_pipeline = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    return_full_text=True,
    generation_config=generation_config,
)

llm = HuggingFacePipeline(pipeline=text_pipeline, model_kwargs={"temperature": 0})




Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]



In [5]:
%%time
# Test LLM with a simple example
result = llm(
    "Explain the difference between ChatGPT and open source LLMs in a couple of lines."
)

CPU times: user 3.16 s, sys: 283 ms, total: 3.44 s
Wall time: 3.44 s


In [6]:
print(result)


ChatGPT is a proprietary language model developed by OpenAI, while open-source LLMs are models that are available for anyone to use and modify under an open-source license. This means that ChatGPT has been trained on a specific dataset and may have biases or limitations that are not present in open-source LLMs, which can be customized and improved upon by the community.


### Helper

In [7]:
colors = [(0, 0, 0),
          (0, 116, 217),
          (255, 65, 54),
          (46, 204, 6),
          (255, 220, 0),
          (170, 170, 170),
          (240, 18, 190),
          (255, 133, 27),
          (127, 219, 255),
          (135, 12, 37)]

def grid_to_img(grid):
  grid = np.int32(grid)
  scale = 10
  img = np.zeros((grid.shape[0] * scale + 1, grid.shape[1] * scale + 1, 3), dtype=np.uint8)
  for r in range(grid.shape[0]):
    for c in range(grid.shape[1]):
      img[r*scale+1:(r+1)*scale, c*scale+1:(c+1)*scale, :] = colors[grid[r, c]]
  new_img = img.copy()
  new_img[0::10, :, :] = np.uint8(np.round((0.7 * np.float32(img[0::10, :, :]) + 0.3 * 255)))
  new_img[:, 0::10, :] = np.uint8(np.round((0.7 * np.float32(img[:, 0::10, :]) + 0.3 * 255)))
  return new_img


In [8]:
# get context and tasks out of json
def get_context(task_json):
    item_delim = ", "
    #text = "train:\n"
    text = ""
    for sample in task_json["train"]:
        text += "train input:\n"
        for row in sample["input"]:
            for i, value in enumerate(row):
                text += str(value)
                if i < len(row) - 1:
                    text += item_delim
            text += "\n"
        text += "train output:\n"
        for row in sample["output"]:
            for i, value in enumerate(row):
                text += str(value)
                if i < len(row) - 1:
                    text += item_delim
            text += "\n"
        text += "End of example.\n"  
    return text


def get_tasks(task_json):
    tasks = []
    solutions = []
    item_delim = ", "
    
    for sample in task_json["test"]:
        #task = "test:\n"
        task = "test input:\n"
        for row in sample["input"]:
            for i, value in enumerate(row):
                task += str(value)
                if i < len(row) - 1:
                    task += item_delim
            task += "\n"
        task += "test output:\n"
        # solution = "output:\n"
        solution = ""
        for row in sample["output"]:
            for i, value in enumerate(row):
                solution += str(value)
                if i < len(row) - 1:
                    solution += item_delim
            solution += "\n"
        tasks.append(task)
        solutions.append(solution)
    return tasks, solutions


In [9]:
# transform text back to json
def string_to_integer_array(input_string):
    try:
        integer_array = []
        # split the input string by "\n"
        input_string = [row for row in input_string.split('\n')]
        # Split the input string by commas and convert each substring to an integer
        for row in input_string:
            integer_array.append([int(num) for num in row.split(',')])
        return integer_array
    except ValueError:
        # Handle the case where some elements are not valid integers
        return None

def extract_lines_with_numbers(input_string, ignore_input= False):
    output_found= False
    
    # Define a regular expression pattern to match lines with arbitrary numbers separated by commas
    pattern = r'\d+(?:,\s*\d+)*'  # This pattern matches one or more digits, possibly separated by commas

    # Split the input_string into lines
    lines = input_string.split('\n')

    # Initialize an empty list to store the matched lines
    matched_lines = []

    # Initialize a flag to determine whether to ignore lines
    ignore_lines = False

    # Iterate through the lines
    for line in lines:
        if ignore_input and ignore_lines:
            # If we're in ignore mode, continue until a line with text occurs
            if len(re.findall(pattern, line)) == 0: # Check if the line contains text (ignoring leading/trailing whitespace)
                ignore_lines = False
            continue

        # Check if the line contains "Input" or "input"
        if ignore_input and ("Input" in line or "input" in line):
            ignore_lines = True
            continue

        # Check if "End of example" is encountered
        if "End of example" in line:
            break

        # Find matches in the current line and add them to the list
        matches = re.findall(pattern, line)
        if len(matches) > 0:
            matched_lines.extend(matches)
            output_found = True
        elif output_found:
            break

    # Join the matched lines into a single string with line breaks
    result_string = '\n'.join(matched_lines)

    return result_string

def get_LLM_result_as_json(tasks, results):
    llm_task_results = []
    for task, result in zip(tasks, results):
        clean_task = extract_lines_with_numbers(task)
        input = string_to_integer_array(clean_task)
        clean_result = extract_lines_with_numbers(result, True)
        output = string_to_integer_array(clean_result) 
        d = {"input": input, "output": output}
        llm_task_results.append(d)
    llm_task_results = dict({
        "test": llm_task_results,
    })
    return llm_task_results

### Main

In [10]:
#load data 
tasks_jsons = []
tasks_names = []
tasks_len = []
task_dir = "../ARC/ARC/data/training"
for task_file in sorted(os.listdir(task_dir)):
  with open(os.path.join(task_dir, task_file)) as fid:
    task_json = json.load(fid)
  tasks_jsons.append(task_json)
  tasks_names.append(task_file)


task_dir = "../ARC/ARC/data/evaluation"
for task_file in sorted(os.listdir(task_dir)):
  with open(os.path.join(task_dir, task_file)) as fid:
    task_json = json.load(fid)
  tasks_jsons.append(task_json)
  tasks_names.append(task_file)


print("Total number of tasks:", len(tasks_jsons))

Total number of tasks: 800


In [11]:
# Get the current date and time
current_datetime = datetime.datetime.now()
# Format the date and time as a string 
directory = "results/"+current_datetime.strftime("%Y-%m-%d_%H-%M-%S")
os.makedirs(directory, exist_ok=True)

# My Approach: 
token_limit = 4096
success = {}
success_log = []
task_counter = 1
promp_oversize_counter = 0
for task_json, task_name in zip(tasks_jsons, tasks_names):
  print(task_counter, "/", len(tasks_jsons))
  # Lazy load: skip evals where we already have results.
  # if task_name in success:
  #   continue

  context = "Do not give explanation.\n"
  context = "I present train examples of input and output pairs. Please return the missing test output.\n"
  context = ""
  # Build context and expected output labels.
  context += get_context(task_json)
  tasks, solutions = get_tasks(task_json)

  if len(tokenizer.encode(context+tasks[0])) > token_limit:
    print(task_name, "Prompt too long.")
    promp_oversize_counter += 1
    continue

  # Run LLM.
  for task in tasks:
    results = []
    try:
      results.append(llm(context+task))
    except Exception as e:
      print(task_name, f"LLM failed. {e}")
      continue

  # Check answers and save success rates.
  success[task_name] = 0
  for result, solution in zip(results, solutions):
    # label_str = tokenizer.decode(label, skip_special_tokens=True)
    is_success = solution.strip() in result
    success[task_name] += is_success / len(solutions)
  success[task_name] = int(success[task_name] > 0.99)  # All test cases need to correct.

  # Debug prints.
  total_success = np.sum(list(success.values()))
  print(task_name, "Success:", success[task_name], "Total:", f"{total_success} / {len(success)}")

  # Save task result in log file, if solved at least one.
  if success[task_name] > 0:
    success_log.append((task_name,total_success/len(success)))
  # save LLM task output as json file
  try:
    LLM_result_json = get_LLM_result_as_json(tasks, results) 
    with open(directory+"/"+task_name+"_LLM_result.json", "w") as json_file:
      json.dump(LLM_result_json, json_file)
  except Exception as e:
    print("Failed to write LLM result as .json file for task "+task_name, f"Error: {e}")
    continue
  # save LLM result as txt file
  try:
    LLM_answer = ""
    for i, result in enumerate(results):
      LLM_answer += f"LLM answer for tasks {i+1}:\n{result}\n"
    with open(directory+"/"+task_name+"_LLM_answer.txt", "w") as text_file:
      text_file.write(LLM_answer)
  except Exception as e:
    print("Failed to write LLM answer as .txt file for task "+task_name, f"Error: {e}")
    continue
  task_counter += 1
print("Done. Success log:")
print("Too long prompts:", promp_oversize_counter)
print(success_log)

1 / 800
007bbfb7.json Success: 0 Total: 0 / 1
2 / 800
00d62c1b.json Prompt too long.
2 / 800
017c7c7b.json Success: 0 Total: 0 / 2
Done. Success log:
Too long prompts: 1
[]


In [None]:
print(context+tasks[0])

In [15]:
llm(context+tasks[0])

'?,?,?\n?,?,?\n?,?,?\n?,?,?\n?,?,?\n?,?,?\nPlease provide the missing test output.'

In [56]:
results[0]

'5, 3, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 3, 0, 0, 3, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 3, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 3, 0, 0, 4\n5, 0, 3, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 3, 0, 0, 0, 0, 4\nEnd of example.'

In [13]:
tasks, solutions = get_tasks(tasks_jsons[0])
#new = delete_substring(tasks[0], "test:\ninput:")
#new
tasks[0]

'test:\ninput:\n7, 0, 7\n7, 0, 7\n7, 7, 0\noutput:\n'

In [80]:
tasks

['test input:\n5, 3, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 3, 0, 0, 3, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 3, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 3, 0, 0, 0, 0, 4\n5, 0, 3, 0, 0, 0, 3, 0, 0, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\ntest output:\n']

In [91]:
task_json["test"]

[{'input': [[5, 3, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 3, 0, 0, 3, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 3, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 3, 0, 0, 4],
   [5, 0, 0, 3, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 3, 0, 0, 0, 0, 4],
   [5, 0, 3, 0, 0, 0, 3, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4]],
  'output': [[5, 5, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 4, 0, 0, 4, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 5, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 4, 0, 0, 4],
   [5, 0, 0, 5, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4],
   [5, 0, 0, 0, 5, 0, 0, 0, 0, 4],
   [5, 0, 5, 0, 0, 0, 4, 0, 0, 4],
   [5, 0, 0, 0, 0, 0, 0, 0, 0, 4]]}]

In [83]:
tasks

['test input:\n5, 3, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 3, 0, 0, 3, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 3, 0, 0, 4\n5, 0, 0, 3, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\n5, 0, 0, 0, 3, 0, 0, 0, 0, 4\n5, 0, 3, 0, 0, 0, 3, 0, 0, 4\n5, 0, 0, 0, 0, 0, 0, 0, 0, 4\ntest output:\n']

In [94]:
llm_task_results = []
for task, result in zip(tasks, results):
    # ist quatsch für task und solution, da die ja schon als json vorliegen
    # clean_task = delete_substring(task, "test:\ninput:\n")
    # clean_task = delete_substring(clean_task, "\noutput:\n")
    clean_task = extract_lines_with_numbers(task)
    input = string_to_integer_array(clean_task)
    clean_result = extract_lines_with_numbers(result, True)
    output = string_to_integer_array(clean_result) 
    d = {"input": input, "output": output}
    llm_task_results.append(d)
llm_task_results = dict({
    "test": llm_task_results,
})
print(llm_task_results)
      

{'test': [{'input': [[5, 3, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 3, 0, 0, 3, 4], [5, 0, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 3, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 0, 3, 0, 0, 4], [5, 0, 0, 3, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 3, 0, 0, 0, 0, 4], [5, 0, 3, 0, 0, 0, 3, 0, 0, 4], [5, 0, 0, 0, 0, 0, 0, 0, 0, 4]], 'output': [[5, 3, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 3, 0, 0, 3, 4], [5, 0, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 3, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 3, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 0, 3, 0, 0, 4], [5, 0, 3, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 3, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 0, 0, 0, 0, 0, 4], [5, 0, 0, 0, 3, 0, 0, 0, 0, 4]]}]}


In [17]:
test_string = "input:\n7, 0, 7\n7, 0, 7\n7, 7, 0\noutput:\n7, 0, 7, 0, 0, 0, 7, 0, 7\n7, 0, 7, 0, 0, 0, 7, 0, 7\n7, 7, 0, 0, 0, 0, 7, 7, 0\n7, 0, 7, 0, 0, 0, 7, 0, 7\n7, 0, 7, 0, 0, 0, 7, 0, 7\n7, 7, 0, 0, 0, 0, 7, 7, 0\n7, 0, 7, 7, 0, 7, 0, 0, 0\n7, 0, 7, 7, 0, 7, 0, 0, 0\n7, 7, 0, 7, 7, 0, 0, 0, 0\n"

In [74]:
print("helle\n"+results[0])

helle
5, 3, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 3, 0, 0, 3, 4
5, 0, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 3, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 3, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 0, 3, 0, 0, 4
5, 0, 3, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 3, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 3, 0, 0, 0, 0, 4
End of example.


In [75]:
print(extract_lines_with_numbers(task+results[0]))

5, 3, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 3, 0, 0, 3, 4
5, 0, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 3, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 3, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 0, 3, 0, 0, 4
5, 0, 3, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 3, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 0, 0, 0, 0, 0, 4
5, 0, 0, 0, 3, 0, 0, 0, 0, 4


1, 2, 3
4, 5, 6
7, 8, 9
10, 11, 12, 13


In [26]:
row = "0,0,1,0,0\n"
string_to_integer_list(row)

[0, 0, 1, 0, 0]

In [None]:
dict({
    "input": 
})

In [19]:
task_json


{'train': [{'input': [[0, 0, 0, 0, 0, 0, 5, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 4, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 5, 0],
    [0, 5, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 5, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [5, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 4, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 5, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 5]],
   'output': [[0, 0, 0, 0, 0, 0, 5, 0, 0, 0],
    [0, 0, 2, 2, 2, 0, 0, 0, 0, 0],
    [0, 0, 2, 4, 2, 0, 0, 0, 0, 0],
    [0, 0, 2, 2, 2, 0, 0, 0, 5, 0],
    [0, 5, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 5, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 2, 2, 2, 2, 2, 0],
    [5, 0, 0, 0, 2, 2, 2, 2, 2, 0],
    [0, 0, 0, 0, 2, 2, 4, 2, 2, 0],
    [0, 0, 0, 0, 2, 2, 2, 2, 2, 0],
    [0, 5, 0, 0, 2, 2, 2, 2, 2, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 5]]},
  {'input': [[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    [0, 0, 0, 0, 0, 0, 0

In [None]:
# Show problem.
print("TRAIN:")
for i, ex in enumerate(example_json["train"]):
  in_img = grid_to_img(ex["input"])
  out_img = grid_to_img(ex["output"])
  plt.subplot(1, 2, 1); plt.imshow(grid_to_img(ex["input"]))
  plt.subplot(1, 2, 2); plt.imshow(grid_to_img(ex["output"]))
  plt.show()
print("TEST:")
for i, ex in enumerate(example_json["test"]):
  in_img = grid_to_img(ex["input"])
  out_img = grid_to_img(ex["output"])
  plt.subplot(1, 2, 1); plt.imshow(grid_to_img(ex["input"]))
  plt.subplot(1, 2, 2); plt.imshow(grid_to_img(ex["output"]))
  plt.show()