In [6]:
import json
import os
import glob
from pathlib import Path
from typing import List, Dict, Any

def check_schema_consistency(jsonl_files: List[str]) -> bool:
    """Check if all JSONL files have identical schema."""
    if not jsonl_files:
        return True
    
    # Read first file to get reference schema
    with open(jsonl_files[0], 'r') as f:
        first_line = f.readline().strip()
        if not first_line:
            return True
        reference_schema = set(json.loads(first_line).keys())
    
    # Check all other files
    for file_path in jsonl_files[1:]:
        with open(file_path, 'r') as f:
            first_line = f.readline().strip()
            if not first_line:
                continue
            current_schema = set(json.loads(first_line).keys())
            if current_schema != reference_schema:
                print(f"Schema mismatch in {file_path}")
                print(f"Expected: {reference_schema}")
                print(f"Found: {current_schema}")
                return False
    
    return True

def flatten_jsonl_files(input_files: List[str], output_file: str) -> int:
    """Flatten multiple JSONL files into a single JSONL file."""
    total_lines = 0
    
    with open(output_file, 'w') as outfile:
        for file_path in input_files:
            print(f"Processing {file_path}...")
            with open(file_path, 'r') as infile:
                for line in infile:
                    line = line.strip()
                    if line:  # Skip empty lines
                        outfile.write(line + '\n')
                        total_lines += 1
    
    return total_lines

# For RAVEN, we need to add a "split" row for train/valid set

In [None]:
# Found: {'question', 'subset_split', 'split', 'correct_answer', 'combined_image_path', 'steps_with_score', 'response', 'id'}
# Expected: {'question', 'subset_split', 'correct_answer', 'combined_image_path', 'steps_with_score', 'response', 'id'}

In [10]:
# /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single
import os

# Directory path
directory = "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine"

# List all files in the directory
try:
    files = os.listdir(directory)
    
    print(f"Files in {directory}:")
    print(f"Total files: {len(files)}")
    print("-" * 50)
    
    for i, filename in enumerate(sorted(files), 1):
        print(f"{i:3d}. {filename}")
        
except FileNotFoundError:
    print(f"Directory not found: {directory}")
except PermissionError:
    print(f"Permission denied accessing: {directory}")
except Exception as e:
    print(f"Error: {e}")

Files in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine:
Total files: 7
--------------------------------------------------
  1. distribute_nine_train_raven_rollouts_0_1999_streaming.jsonl
  2. distribute_nine_train_raven_rollouts_2000_3999_streaming.jsonl
  3. distribute_nine_train_raven_rollouts_4000_5999_streaming.jsonl
  4. distribute_nine_train_raven_rollouts_6000_7999_streaming.jsonl
  5. distribute_nine_train_raven_rollouts_8000_9999_streaming.jsonl
  6. distribute_nine_validation_raven_rollouts_0_4999_streaming.jsonl
  7. distribute_nine_validation_raven_rollouts_5000_9999_streaming.jsonl


In [3]:
import json
import os
from typing import List, Dict, Any

def add_split_column_to_jsonl_files(file_paths: List[str], split_value: str, output_dir: str = None, backup: bool = True) -> None:
    """
    Add a 'split' column to JSONL files.
    
    Args:
        file_paths: List of JSONL file paths to process
        split_value: The value to assign to the 'split' column for all rows
        output_dir: Directory to save modified files (if None, overwrites original)
        backup: Whether to create backup files before modification (only if output_dir is None)
    """
    
    for file_path in file_paths:
        if not os.path.exists(file_path):
            print(f"Warning: File not found: {file_path}")
            continue
            
        print(f"Processing {file_path}...")
        
        # Determine output file path
        if output_dir:
            # Create output directory if it doesn't exist
            os.makedirs(output_dir, exist_ok=True)
            # Use original filename in output directory
            filename = os.path.basename(file_path)
            output_file_path = os.path.join(output_dir, filename)
        else:
            # Overwrite original file
            output_file_path = file_path
            # Create backup if requested
            if backup:
                backup_path = file_path + ".backup"
                os.rename(file_path, backup_path)
                source_path = backup_path
            else:
                source_path = file_path
        
        # Read, modify, and write
        modified_lines = []
        source_path = file_path if output_dir else source_path
        
        with open(source_path, 'r') as infile:
            for line_num, line in enumerate(infile, 1):
                line = line.strip()
                if line:  # Skip empty lines
                    try:
                        data = json.loads(line)
                        # Add the split column
                        data['split'] = split_value
                        modified_lines.append(json.dumps(data))
                    except json.JSONDecodeError as e:
                        print(f"Error parsing line {line_num} in {file_path}: {e}")
                        continue
        
        # Write modified data to output file
        with open(output_file_path, 'w') as outfile:
            for line in modified_lines:
                outfile.write(line + '\n')
        
        if output_dir:
            print(f"Added 'split' column with value '{split_value}' to {len(modified_lines)} rows in {output_file_path}")
        else:
            print(f"Added 'split' column with value '{split_value}' to {len(modified_lines)} rows in {file_path}")

# # Example usage:
# # Specify your JSONL file paths
# jsonl_files = [
#     "./raw_rollouts/rollouts_only/RAVEN/left_center_single_right_center_single_train/left_center_single_right_center_single_train_raven_rollouts_6859_8000_streaming.jsonl",
#     "./raw_rollouts/rollouts_only/RAVEN/left_center_single_right_center_single_train/left_center_single_right_center_single_train_raven_rollouts_3430_4572_streaming.jsonl",
#     # Add more file paths as needed
# ]

# # Specify the split value
# split_value = "train"  # or "validation", "test", etc.

# # Option 1: Create new files in output directory
# output_directory = "./modified_jsonl_files"
# add_split_column_to_jsonl_files(jsonl_files, split_value, output_dir=output_directory)

# # Option 2: Overwrite original files (with backup)
# # add_split_column_to_jsonl_files(jsonl_files, split_value, backup=True)

In [11]:
# Process different groups of files with different split values
file_groups = {
#     "train": [
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_0_1999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_2000_3332_streaming.jsonl", 
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_3333_4665_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_4666_5998_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_5999_7331_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_7332_8664_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_train_raven_rollouts_8665_9995_streaming.jsonl"
# ],
# "train": [
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_train_raven_rollouts_0_1999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_train_raven_rollouts_2000_3999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_train_raven_rollouts_4000_5999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_train_raven_rollouts_6000_7999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_train_raven_rollouts_8000_9999_streaming.jsonl"
# ],
"train": [
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_0_1999_streaming.jsonl",
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_2000_3999_streaming.jsonl",
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_4000_5999_streaming.jsonl",
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_6000_7999_streaming.jsonl",
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_8000_9999_streaming.jsonl"
],
    # "validation":  [
    # "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_validation_raven_rollouts_5000_9999.jsonl",
    # "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/center_single/center_single_validation_raven_rollouts__validation_0_4999.jsonl"
#     "validation":  [
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_validation_raven_rollouts_0_4999_streaming.jsonl",
#     "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_four/distribute_four_validation_raven_rollouts_5000_9999_streaming.jsonl"
# ]
    "validation":   [
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_validation_raven_rollouts_0_4999_streaming.jsonl",
    "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_validation_raven_rollouts_5000_9999_streaming.jsonl"
]
}

# Process each group
for split_val, files in file_groups.items():
    print(f"\n=== Processing {split_val} files ===")
    add_split_column_to_jsonl_files(files, split_val, output_dir=f"/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN_new", backup=False)


=== Processing train files ===
Processing /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_0_1999_streaming.jsonl...
Added 'split' column with value 'train' to 4771 rows in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN_new/distribute_nine_train_raven_rollouts_0_1999_streaming.jsonl
Processing /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_2000_3999_streaming.jsonl...
Added 'split' column with value 'train' to 4770 rows in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN_new/distribute_nine_train_raven_rollouts_2000_3999_streaming.jsonl
Processing /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/rollouts_only/RAVEN/distribute_nine/distribute_nine_train_raven_rollouts_4000_5999_streaming.jsonl...
Added 'split' column with value 'train' to 4770 rows in /mnt/fast10/brandon/mmr_rollout_data/raw_ro

# now we flatten

In [12]:
splits = ["RAVEN_new"]
# splits = ["AI2D", "CLEVR_10K", "RAVEN"]
output_dir = "/mnt/fast10/brandon/mmr_rollout_data/flattened_rollout_files"

# Process each split
for split in splits:
    print(f"\n=== Processing split: {split} ===")
    
    # Find all JSONL files in the split directory
    split_dir = f"./raw_rollouts/rollouts_only/{split}"
    if split == "RAVEN":
        jsonl_pattern = os.path.join(split_dir, "**", "*.jsonl")
        jsonl_files = glob.glob(jsonl_pattern, recursive=True)
    else:
        jsonl_pattern = os.path.join(split_dir, "*.jsonl")
        jsonl_files = glob.glob(jsonl_pattern)
    
    if not jsonl_files:
        print(f"No JSONL files found in {split_dir}")
        continue
    
    print(f"Found {len(jsonl_files)} JSONL files:")
    for file_path in jsonl_files:
        print(f"  - {file_path}")
    
    # Check schema consistency
    print(f"\nChecking schema consistency...")
    if not check_schema_consistency(jsonl_files):
        print(f"ERROR: Schema mismatch detected in {split}. Skipping flattening.")
        continue
    
    print("Schema check passed!")
 
    # Flatten files
    output_file = f"{output_dir}/{split}_flattened.jsonl"
    print(f"Flattening to {output_file}...")
    
    total_lines = flatten_jsonl_files(jsonl_files, output_file)
    print(f"Successfully flattened {total_lines} lines to {output_file}")

print("\n=== All splits processed ===")


=== Processing split: RAVEN_new ===
Found 54 JSONL files:
  - ./raw_rollouts/rollouts_only/RAVEN_new/distribute_four_validation_raven_rollouts_5000_9999_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/distribute_four_validation_raven_rollouts_0_4999_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/distribute_four_train_raven_rollouts_2000_3999_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/up_center_single_down_center_single_train_raven_rollouts_7201_8000_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/up_center_single_down_center_single_train_raven_rollouts_5601_6400_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/center_single_train_raven_rollouts_4666_5998_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/left_center_single_right_center_single_train_raven_rollouts_6859_8000_streaming.jsonl
  - ./raw_rollouts/rollouts_only/RAVEN_new/in_center_single_out_center_single_train_raven_rollouts_5716_6858_streaming.jsonl
  - ./

In [13]:
# Verify the flattened files
for split in splits:
    output_file = f"{output_dir}/{split}_flattened.jsonl"
    if os.path.exists(output_file):
        with open(output_file, 'r') as f:
            line_count = sum(1 for line in f if line.strip())
        print(f"{split}_flattened.jsonl: {line_count} lines")

RAVEN_new_flattened.jsonl: 286003 lines


## Flatten Verification Query Files

## for typical split where dataset name and split is the same because no splits within the dataset. BUT RAVEN is different (see below)

In [None]:
import os
import glob

# Configuration
dataset_name = "AI2D" # no need for RAVEN_new because that is only for appending the split columns to the rollout files

models = ["gpt-4.1-mini", "gpt-4.1-nano", "o4-mini"]
output_dir = "/mnt/fast10/brandon/mmr_rollout_data/flattened_verification_query_files"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each model
for model in models:
    print(f"\n=== Processing model: {model} ===")
    
    # Find all JSONL files in the verification batches directory
    verification_dir = f"/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/{dataset_name}/verification/verification_pipeline_outputs/{model}/{dataset_name}/verification_batches/"
    
    jsonl_pattern = os.path.join(verification_dir, "*.jsonl")
    jsonl_files = glob.glob(jsonl_pattern)
    
    if not jsonl_files:
        print(f"No JSONL files found in {verification_dir}")
        continue
    
    print(f"Found {len(jsonl_files)} JSONL files:")
    for file_path in jsonl_files:
        print(f"  - {file_path}")
    
    # Check schema consistency
    print(f"\nChecking schema consistency...")
    if not check_schema_consistency(jsonl_files):
        print(f"ERROR: Schema mismatch detected in {model}. Skipping flattening.")
        continue
    
    print("Schema check passed!")
 
    # Flatten files
    output_file = f"{output_dir}/{dataset_name}_{model}_verification_query_flattened.jsonl"
    print(f"Flattening to {output_file}...")
    
    total_lines = flatten_jsonl_files(jsonl_files, output_file)
    print(f"Successfully flattened {total_lines} lines to {output_file}")

print("\n=== All models processed ===")


=== Processing model: gpt-4.1-mini ===
No JSONL files found in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/RAVEN/verification_batches/

=== Processing model: gpt-4.1-nano ===
No JSONL files found in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-nano/RAVEN/verification_batches/

=== Processing model: o4-mini ===
No JSONL files found in /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/o4-mini/RAVEN/verification_batches/

=== All models processed ===


In [17]:
import os
import glob

# Configuration
dataset_name = "RAVEN" # no need for RAVEN_new because that is only for appending the split columns to the rollout files
# special case for RAVEN, which has a split within the dataset
RAVEN_dataset_patterns = ["center_single", "distribute_four", "distribute_nine", "in_center_single_out_center_single_train", "in_distribute_four_out_center_single_train", "left_center_single_right_center_single_train", "up_center_single_down_center_single_train" ]

# CLEVR splits
# CLEVR_dataset_patterns = ["CLEVR_first_5k", "CLEVR_second_5k"]

models = ["gpt-4.1-mini", "gpt-4.1-nano", "o4-mini"]
output_dir = "/mnt/fast10/brandon/mmr_rollout_data/flattened_verification_query_files"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each model
for model in models:
    for dataset_pattern in RAVEN_dataset_patterns: # edit accordingly
        print(f"\n=== Processing model: {model} and dataset pattern: {dataset_pattern} ===")
        
        # Find all JSONL files in the verification batches directory
        verification_dir = f"/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/{dataset_name}/verification/verification_pipeline_outputs/{model}/{dataset_pattern}/verification_batches/"
        
        jsonl_pattern = os.path.join(verification_dir, "*.jsonl")
        jsonl_files = glob.glob(jsonl_pattern)
        
        if not jsonl_files:
            print(f"No JSONL files found in {verification_dir}")
            continue
        
        print(f"Found {len(jsonl_files)} JSONL files:")
        for file_path in jsonl_files:
            print(f"  - {file_path}")
        
        # Check schema consistency
        print(f"\nChecking schema consistency...")
        if not check_schema_consistency(jsonl_files):
            print(f"ERROR: Schema mismatch detected in {model}. Skipping flattening.")
            continue
        
        print("Schema check passed!")
    
        # Flatten files
        output_file = f"{output_dir}/{dataset_name}_{model}_verification_query_flattened.jsonl"
        print(f"Flattening to {output_file}...")
        
        total_lines = flatten_jsonl_files(jsonl_files, output_file)
        print(f"Successfully flattened {total_lines} lines to {output_file}")

print("\n=== All models processed ===")


=== Processing model: gpt-4.1-mini and dataset pattern: center_single ===
Found 12 JSONL files:
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/verification_batches/batch_0012.jsonl
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/verification_batches/batch_0005.jsonl
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/verification_batches/batch_0008.jsonl
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/verification_batches/batch_0010.jsonl
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/verification_batches/

# Flatten verification JSON (result) files

In [18]:
def check_schema_consistency_json(json_files: List[str]) -> bool:
    """Check if all JSON files have identical schema."""
    if not json_files:
        return True
    
    # Read first file to get reference schema
    with open(json_files[0], 'r') as f:
        data = json.load(f)
        if isinstance(data, list) and len(data) > 0:
            reference_schema = set(data[0].keys())
        # elif isinstance(data, dict):
        #     reference_schema = set(data.keys())
        else:
            raise ValueError(f"Invalid data type in {json_files[0]}")
    
    # Check all other files
    for file_path in json_files[1:]:
        with open(file_path, 'r') as f:
            data = json.load(f)
            if isinstance(data, list) and len(data) > 0:
                current_schema = set(data[0].keys())
            # elif isinstance(data, dict):
            #     current_schema = set(data.keys())
            else:
                raise ValueError(f"Invalid data type in {file_path}")
            if current_schema != reference_schema:
                print(f"Schema mismatch in {file_path}")
                print(f"Expected: {reference_schema}")
                print(f"Found: {current_schema}")
                raise ValueError(f"Schema mismatch in {file_path}")
    
    return True

def flatten_json_files(input_files: List[str], output_file: str) -> int:
    """Flatten multiple JSON files into a single JSON file."""
    all_data = []
    
    for file_path in input_files:
        print(f"Processing {file_path}...")
        with open(file_path, 'r') as f:
            data = json.load(f)
            if isinstance(data, list):
                all_data.extend(data)
            else:
                all_data.append(data)
    
    with open(output_file, 'w') as f:
        json.dump(all_data, f, indent=2)
    
    return len(all_data)

## Like for query files above, two cases. Firstly for flat single structure. Then for double structure (below)

In [None]:
import os
import glob

# Configuration
dataset_name = "AI2D"
models = ["gpt-4.1-mini", "gpt-4.1-nano", "o4-mini"]
output_dir = "/mnt/fast10/brandon/mmr_rollout_data/flattened_verification_result_files"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each model
for model in models:
    print(f"\n=== Processing model: {model} ===")
    
    # Find all JSONL files in the verification batches directory
    verification_dir = f"/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/{dataset_name}/verification/verification_pipeline_outputs/{model}/{dataset_name}"
    
    jsonl_pattern = os.path.join(verification_dir, "batch_*_verification_results.json")
    jsonl_files = glob.glob(jsonl_pattern)
    
    if not jsonl_files:
        print(f"No JSONL files found in {verification_dir}")
        continue
    
    print(f"Found {len(jsonl_files)} JSONL files:")
    for file_path in jsonl_files:
        print(f"  - {file_path}")
    
    # Check schema consistency
    print(f"\nChecking schema consistency...")
    if not check_schema_consistency_json(jsonl_files):
        print(f"ERROR: Schema mismatch detected in {model}. Skipping flattening.")
        continue
    
    print("Schema check passed!")
 
    # Flatten files
    output_file = f"{output_dir}/{dataset_name}_{model}_verification_result_flattened.json"
    print(f"Flattening to {output_file}...")
    
    total_lines = flatten_json_files(jsonl_files, output_file)
    print(f"Successfully flattened {total_lines} lines to {output_file}")

print("\n=== All models processed ===")

## Double Structure

In [21]:
import os
import glob

# Configuration
dataset_name = "RAVEN"
models = ["gpt-4.1-mini", "gpt-4.1-nano", "o4-mini"]

# CLEVR_dataset_patterns = ["CLEVR_first_5k", "CLEVR_second_5k"]
# special case for RAVEN, which has a split within the dataset
RAVEN_dataset_patterns = ["center_single", "distribute_four", "distribute_nine", "in_center_single_out_center_single_train", "in_distribute_four_out_center_single_train", "left_center_single_right_center_single_train", "up_center_single_down_center_single_train" ]

output_dir = "/mnt/fast10/brandon/mmr_rollout_data/flattened_verification_result_files"

# Create output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Process each model
for model in models:
    for dataset_pattern in RAVEN_dataset_patterns: # edit accordingly
        print(f"\n=== Processing model: {model} ===")
        
        # Find all JSON files in the verification batches directory
        verification_dir = f"/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/{dataset_name}/verification/verification_pipeline_outputs/{model}/{dataset_pattern}"
        
        jsonl_pattern = os.path.join(verification_dir, "batch_*_verification_results.json")
        jsonl_files = glob.glob(jsonl_pattern)
        
        if not jsonl_files:
            print(f"No JSONL files found in {verification_dir}")
            continue
        
        print(f"Found {len(jsonl_files)} JSONL files:")
        for file_path in jsonl_files:
            print(f"  - {file_path}")
        
        # Check schema consistency
        print(f"\nChecking schema consistency...")
        if not check_schema_consistency_json(jsonl_files):
            print(f"ERROR: Schema mismatch detected in {model}. Skipping flattening.")
            continue
        
        print("Schema check passed!")
    
        # Flatten files
        output_file = f"{output_dir}/{dataset_name}_{model}_verification_result_flattened.json"
        print(f"Flattening to {output_file}...")
        
        total_lines = flatten_json_files(jsonl_files, output_file)
        print(f"Successfully flattened {total_lines} lines to {output_file}")

print("\n=== All models processed ===")


=== Processing model: gpt-4.1-mini ===
Found 12 JSONL files:
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/batch_0005_verification_results.json
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/batch_0003_verification_results.json
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/batch_0008_verification_results.json
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/batch_0004_verification_results.json
  - /mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/RAVEN/verification/verification_pipeline_outputs/gpt-4.1-mini/center_single/batch_0007_verification_results.json
  - /mnt/fast10/brandon

# Explore Files

In [15]:
# load first row of JSONL file from this path with keys

import json

# Load first row of the JSONL file
file_path = "/mnt/fast10/brandon/mmr_rollout_data/raw_rollouts/soft_estimation/AI2D/verification/verification_pipeline_outputs/o4-mini/AI2D/verification_batches/batch_0001.jsonl"

with open(file_path, 'r') as f:
    first_line = f.readline().strip()
    if first_line:
        first_row = json.loads(first_line)
        print("Keys in first row:")
        for key in first_row.keys():
            print(f"  - {key}")
        print("First row values:")
        for key, value in first_row.items():
            if key != "body":
                print(f"  {key}: {value}")
            else:
                print(f"  {key}: {type(value)}")
                print(f"  {key}: {value.keys()}")
                
                print(f"  Messages in Body Key: {value['messages'][0]['content'][0]["text"]}")
        print(first_row["custom_id"])
        print(f"\nTotal keys: {len(first_row.keys())}")
    else:
        print("File is empty")

# Count rows in JSONL file (excluding header)
# with open(file_path, 'r') as f:
#     # Skip first line (header) and count remaining lines
#     lines = f.readlines()
#     if lines:
#         # Remove header and count non-empty lines
#         data_lines = [line for line in lines if line.strip()]
#         row_count = len(data_lines)
#         print(f"Number of data rows (excluding header): {row_count}")
#     else:
#         print("File is empty")


Keys in first row:
  - custom_id
  - method
  - url
  - body
First row values:
  custom_id: 19ce6132-6781-4c96-adf5-f6f5da514761
  method: POST
  url: /chat/completions
  body: <class 'dict'>
  body: dict_keys(['model', 'messages', 'max_completion_tokens'])
  Messages in Body Key: I will provide a visual reasoning problem along with a solution. They will be formatted as follows, where m and n need not be equal: 

```
[Visual Reasoning Problem]

 <visual_reasoning_problem>
 ...(visual reasoning problem)... 
</visual_reasoning_problem> 

[Solution]

<solution>
[Visual Elements]
<step_1>
...(Step 1 of step-by-step visual elements perception)...
</step_1>
<step_2>
...(Step 2 of step-by-step visual elements perception)...
</step_2>
...
<step_m>
...(Step m of step-by-step visual elements perception)...
</step_m>

[Reasoning]
<step_1>
...(Step 1 of step-by-step reasoning)...
</step_1>
<step_2>
...(Step 2 of step-by-step reasoning)...
</step_2>
...
<step_n>
...(Step n of step-by-step reasoning

In [None]:
import json

# Load first item from the JSON file (does not work for JSONL)
file_path = "/mnt/fast10/brandon/mmr_rollout_data/flattened_verification_result_files/AI2D_o4-mini_verification_flattened.json"
# empty verification values means likely the verification returned an error
with open(file_path, 'r') as f:
    data = json.load(f)
    
    if isinstance(data, list) and len(data) > 0:
        first_item = data[0]
        print("Keys in first item:")
        for key in first_item.keys():
            print(f"  - {key}")
        print(f"\nTotal keys: {len(first_item.keys())}")
    elif isinstance(data, dict):
        print("Keys in JSON object:")
        for key in data.keys():
            print(f"  - {key}")
        print(f"\nTotal keys: {len(data.keys())}")
    else:
        print("File is empty or not a list/dict")

JSONDecodeError: Extra data: line 2 column 1 (char 92363)

In [None]:
# Find and print the item with the specific custom_id
target_custom_id = "203288af-ef39-41da-bff6-5184ad83b951"

with open(file_path, 'r') as f:
    data = json.load(f)
    
    if isinstance(data, list):
        # Search through the list for the target custom_id
        target_item = None
        for item in data:
            if item.get("custom_id") == target_custom_id:
                target_item = item
                break
        
        if target_item:
            print(f"Found item with custom_id: {target_custom_id}")
            print("Item contents:")
            print(json.dumps(target_item, indent=2))
        else:
            print(f"No item found with custom_id: {target_custom_id}")
    else:
        print("Data is not a list")

Found item with custom_id: 203288af-ef39-41da-bff6-5184ad83b951
Item contents:
{
  "custom_id": "203288af-ef39-41da-bff6-5184ad83b951",
  "verification_response": ""
}


In [2]:
# load first row of JSONL file from this path with keys

import json

# Load first row of the JSONL file
file_path = "/mnt/fast10/brandon/mmr_rollout_data/flattened_rollout_files/AI2D_flattened.jsonl"

with open(file_path, 'r') as f:
    first_line = f.readline().strip()
    if first_line:
        first_row = json.loads(first_line)
        print("Keys in first row:")
        for key in first_row.keys():
            print(f"  - {key}")
        print("First row values:")
        for key, value in first_row.items():
            print(f"  {key}: {value}")
        # print(first_row["conversations"])
        # print(first_row[""])
        print(f"\nTotal keys: {len(first_row.keys())}")
    else:
        print("File is empty")

Keys in first row:
  - image
  - question
  - answer
  - uid
  - image_path
  - response
  - steps_with_score
First row values:
  image: MMPR-v1.2-prompts/correctness_images/ai2d_train_12k_en_20240410_extracted/images/480.png
  question: You are an advanced visual reasoning AI specialized in analyzing images for the Visual Question Answering (VQA) task. Your objective is to examine images containing various objects, scenes, geometric shapes, diagram elements, and potentially text or numbers, and reason about processes or changes, and answer questions about their attributes, relationships, and spatial arrangements.

I will provide you with:

1. An image containing science diagrams
2. A question about the contents of the image

Here is the question you need to answer:

<question>
What would happen if the fox population decreased?
more grouse
more pippit
less groupse
less owl
Please answer the question based on the options mentioned before.
</question>

Please follow these steps to comple