In [10]:
import json
from copy import deepcopy

# File paths
fixed_sql_bird_file = '../data/multi-agents/fixed/gpt-4o-mini-fixed-bird_with_evidence_train.jsonl'
validator_select_file = '../data/multi-agents/validator/gpt-4o-mini-validator_select_bird_with_evidence_train.jsonl'
validator_condition_file = '../data/multi-agents/validator/gpt-4o-mini-validator_condition_bird_with_evidence_train.jsonl'
validator_join_file = '../data/multi-agents/validator/gpt-4o-mini-validator_join_bird_with_evidence_train.jsonl'

# Function to load JSONL files
def load_jsonl(file_path):
    data = []
    with open(file_path, 'r', encoding='utf-8') as file:
        for line in file:
            data.append(json.loads(line))
    return data

# Load all datasets
fixed_sql_bird_data = load_jsonl(fixed_sql_bird_file)
validator_select_data = load_jsonl(validator_select_file)
validator_condition_data = load_jsonl(validator_condition_file)
validator_join_data = load_jsonl(validator_join_file)

# Process and add valid samples
for sample_select, sample_condition, sample_join in zip(validator_select_data, validator_condition_data, validator_join_data):

    # Extract correctness feedback
    select_correct = sample_select.get('feedback_conclude')
    condition_correct = sample_condition.get('feedback_conclude')
    join_correct = sample_join.get('feedback_conclude')

    # If all are correct, add a new sample to fixed_sql_bird_data
    if select_correct and condition_correct and join_correct:
        new_sample = deepcopy(sample_select)
        new_sample = {
            "validator_select": sample_select,
            "validator_condition": sample_condition['validator_condition'],
            "validator_join": sample_join['validator_join'],
            "fixed_sql": ["None"]  # Empty list as per instructions
        }
        fixed_sql_bird_data.append(new_sample)

# Save the updated fixed SQL data
output_file = '../data/multi-agents/fixed/gpt-4o-mini-validator-fixer-bird_with_evidence_train.jsonl'
with open(output_file, 'w', encoding='utf-8') as file:
    for entry in fixed_sql_bird_data:
        file.write(json.dumps(entry, ensure_ascii=False) + '\n')

print(f"Updated fixed SQL data saved to {output_file}")

Updated fixed SQL data saved to ../data/multi-agents/fixed/gpt-4o-mini-validator-fixer-bird_with_evidence_train.jsonl
