In [1]:
import json
import os

In [7]:
def get_arc_data_dir():
    current_dir = os.getcwd()
    arc_root = os.path.dirname(current_dir)
    data_dir = os.path.join(arc_root, 'data')
    if not os.path.exists(data_dir):
        raise FileNotFoundError(f"Data directory not found at {data_dir}")
    
    return data_dir

In [9]:
def load_and_combine_arc_data(base_dir, dataset_type):
    challenges_file = os.path.join(base_dir, 'original', dataset_type, f'arc-agi_{dataset_type}_challenges.json')
    solutions_file = os.path.join(base_dir, 'original', dataset_type, f'arc-agi_{dataset_type}_solutions.json')
    
    challenges = None
    with open(challenges_file, 'r') as f:
        challenges = json.load(f)
    
    solutions = None
    with open(solutions_file, 'r') as f:
        solutions = json.load(f)
        
    combined_data = {}
    for task_id, challenge_data in challenges.items():
        combined_data[task_id] = {
            'test': challenge_data.get('test', []),
            'train': []
        }
        
        # Combine train data with solutions
        for train_item in challenge_data.get('train', []):
            input_data = train_item['input']
            
            output_data = None
            if task_id in solutions:
                output_data = solutions[task_id]
                
            combined_data[task_id]['train'].append({
                'input': input_data,
                'output': output_data
            })
    
    return combined_data
    

In [10]:
data = load_and_combine_arc_data(get_arc_data_dir(), 'training')

In [11]:
data

{'007bbfb7': {'test': [{'input': [[7, 0, 7], [7, 0, 7], [7, 7, 0]]}],
  'train': [{'input': [[0, 7, 7], [7, 7, 7], [0, 7, 7]],
    'output': [[7, 0, 7, 0, 0, 0, 7, 0, 7],
     [7, 0, 7, 0, 0, 0, 7, 0, 7],
     [7, 7, 0, 0, 0, 0, 7, 7, 0],
     [7, 0, 7, 0, 0, 0, 7, 0, 7],
     [7, 0, 7, 0, 0, 0, 7, 0, 7],
     [7, 7, 0, 0, 0, 0, 7, 7, 0],
     [7, 0, 7, 7, 0, 7, 0, 0, 0],
     [7, 0, 7, 7, 0, 7, 0, 0, 0],
     [7, 7, 0, 7, 7, 0, 0, 0, 0]]},
   {'input': [[4, 0, 4], [0, 0, 0], [0, 4, 0]], 'output': None},
   {'input': [[0, 0, 0], [0, 0, 2], [2, 0, 2]], 'output': None},
   {'input': [[6, 6, 0], [6, 0, 0], [0, 6, 6]], 'output': None},
   {'input': [[2, 2, 2], [0, 0, 0], [0, 2, 2]], 'output': None}]},
 '00d62c1b': {'test': [{'input': [[0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0,
      0],
     [0, 0, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
     [0, 3, 0, 3

In [12]:
solutions

NameError: name 'solutions' is not defined