In [25]:
import os
from openai import OpenAI
from dotenv import load_dotenv
from task import TaskDataset, get_task
from observation_generation_prompting import generate_observations
from observation_verification_prompting import process_and_verify_observations
from observation_classification_prompting import classify_observations
from observation_selection_prompting import select_best_observations
from observation_questioning_prompting import generate_questions
import random

MAX_SEARCH_DEPTH = 100

load_dotenv() 

# Initialize OpenAI client
client = OpenAI()

# Get task using the factory function
task = get_task('arc-agi_evaluation_challenges.json', '212895b5')
print(f"Processing task {task['task_id']}")

Processing task 212895b5


In [2]:
# Generate initial observations
try:
    observations = generate_observations(
        client=client,
        num_observations=256,
        task=task)
    print(f"Generated {len(observations)} initial observations")
except Exception as e:
    print(f"Error generating observations: {e}")
    observations = []

Generated 256 initial observations


In [8]:
# Classify observations into 'yes' and 'no'
try:
    yes_observations, no_observations = classify_observations(
        client=client,
        observations=observations,
        verbose=True
    )
    print(f"Classified {len(yes_observations)} yes observations and {len(no_observations)} no observations")
except Exception as e:
    print(f"Error classifying observations: {e}")

Batch 19 response_content:
```python
{
    "response": {
        "yes_observations": [
            {
                "observation": "**Clustered Color Grouping**: Colors tend to cluster together in the output grid, forming blocks or lines rather than remaining isolated, indicating a tendency for the transformation to promote connectivity among like colors.",
                "reason": "This can be verified by analyzing the distribution of colors in the output grid and checking for clusters or contiguous blocks of the same color."
            },
            {
                "observation": "**Increased Density of Colors**: The output grid has a higher density of colors compared to the input, with more filled cells, which could suggest a transformation rule that favors saturation in specific areas.",
                "reason": "This can be easily verified by counting the number of filled cells in both the input and output grids and comparing the densities."
            },
            {
   

In [9]:
# Randomly shuffle yes_observations and no_observations
random.shuffle(yes_observations)
random.shuffle(no_observations)

print(f"yes_observations: {yes_observations}")
print(f"no_observations: {no_observations}")

yes_observations: [{'observation': '**Increased Density**: Many cells that were originally empty in the input grid are filled with color in the output grid, indicating a denser configuration that transforms sparse data into more visually complex arrangements.', 'reason': 'This can be verified by counting the number of empty cells in the input grid and comparing it to the number of empty cells in the output grid to confirm that there are fewer empty cells.'}, {'observation': '**Color Addition**: The transformation introduces additional colors (yellow and red) not present in the input grid, indicating that the output grid incorporates new elements or rules that allow for color expansion.', 'reason': 'This can be easily verified by comparing the sets of colors in the input and output grids to check for the presence of new colors.'}, {'observation': '**Color Transformation**: New colors emerge in the output grid (like red, yellow, and orange), suggesting that the transformation not only re

In [17]:
yes_observations = [item['observation'] for item in yes_observations]
no_observations = [item['observation'] for item in no_observations]

In [19]:
# Split into 16 roughly equal batches
batch_size = len(yes_observations) // 16
yes_batches = [yes_observations[i*batch_size:(i+1)*batch_size] for i in range(0, 16)]
# Select best from each batch
best_yes_observations = []
for batch in yes_batches:
    selected = select_best_observations(
        client=client,
        observations=batch,
        num_best=1,  # Select 1 from each batch
        easy_to_verify=True,
        verbose=False
    )
    print(f"from yes batch: {batch} selected: {selected}")
    best_yes_observations.extend(selected)


from yes batch: ['**Increased Density**: Many cells that were originally empty in the input grid are filled with color in the output grid, indicating a denser configuration that transforms sparse data into more visually complex arrangements.', '**Color Addition**: The transformation introduces additional colors (yellow and red) not present in the input grid, indicating that the output grid incorporates new elements or rules that allow for color expansion.', '**Color Transformation**: New colors emerge in the output grid (like red, yellow, and orange), suggesting that the transformation not only redistributes existing colors but also introduces new elements, enhancing the visual complexity.'] selected: ['**Color Addition**: The transformation introduces additional colors (yellow and red) not present in the input grid, indicating that the output grid incorporates new elements or rules that allow for color expansion.']
from yes batch: ['**Volume Increase**: The number of colored cells inc

In [21]:
# Do the same for no_observations
batch_size = len(no_observations) // 16
no_batches = [no_observations[i*batch_size:(i+1)*batch_size] for i in range(0, 16)]
best_no_observations = []
for batch in no_batches:
    selected = select_best_observations(
        client=client,
        observations=batch,
        num_best=1,
        easy_to_verify=False,
        verbose=False
    )
    best_no_observations.extend(selected)


In [None]:
from observation_generation_prompting import generate_observations


In [27]:
# Initialize observations and 'no's lists
observations_list = best_yes_observations
nos_list = best_no_observations

# Now, for search_depth in range(1, max_search_depth+1)
for search_depth in range(1, MAX_SEARCH_DEPTH+1):
    print(f"Search Depth: {search_depth}")
    if not nos_list:
        break  # No more 'no' observations to expand upon
    new_observations = []
    new_nos = []
    for no_observation in nos_list:
        # Generate questions for this observation
        questions = generate_questions(
            client=client,
            observation=no_observation,
            num_questions=16
        )
        
        # Create additional context combining the original observation and questions
        questions_text = "\n".join([f"- {q}" for q in questions])
        observations = generate_observations(
            client=client,
            num_observations=16,
            task=task,
            verbose=False,
            additional_context=questions_text,
            original_observation=no_observation
        )
        
        # Classify observations
        yes_obs, no_obs = classify_observations(
            client=client,
            observations=observations,
            verbose=False
        )
        
        # Select best observations
        best_yes_obs = select_best_observations(
            client=client,
            observations=yes_obs,
            num_best=16,
            easy_to_verify=True,
            verbose=False
        )
        best_no_obs = select_best_observations(
            client=client,
            observations=no_obs,
            num_best=16,
            easy_to_verify=False,
            verbose=False
        )
        
        # Add to lists
        observations_list.extend(best_yes_obs)
        new_nos.extend(best_no_obs)
    nos_list = new_nos

# Process and verify observations
verified_observations = process_and_verify_observations(
    client=client,
    observations=observations_list,
    examples=task['train'],
    verbose=False
)

# Print the valid observations with code verifiers
print("\nVerified Observations:")
for observation, is_valid in verified_observations.items():
    print(f"\nObservation: {observation}")
    print(f"Valid: {is_valid}")

Search Depth: 1


TypeError: generate_observations() got an unexpected keyword argument 'original_observation'