In [1]:
import csv
import json
import re

def is_valid_json(json_str):
    try:
        json.loads(json_str)
        return True
    except json.JSONDecodeError:
        return False

def parse_json(json_str):
    try:
        # Remove any leading/trailing whitespace and newlines
        json_str = json_str.strip()
        # Parse the JSON string
        data = json.loads(json_str)
        return [
            data.get('1st choice', ''),
            data.get('2nd choice', ''),
            data.get('3rd choice', ''),
            data.get('Justification for 1st choice (EI)', ''),
            data.get('Justification for 1st choice (NS)', ''),
            data.get('Justification for 1st choice (TF)', ''),
            data.get('Justification for 1st choice (PJ)', '')
        ]
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON: {e}")
        print(f"Problematic JSON: {json_str}")
        return None

# Read the text file
with open('20240818_claude_output_chunk_2nd.txt', 'r', encoding='utf-8') as file:
    content = file.read()

# Find all potential JSON objects in the file
potential_json_objects = re.findall(r'\{[^{}]*\}', content, re.DOTALL)

# Filter out non-JSON objects and parse valid ones
parsed_data = []
for i, json_obj in enumerate(potential_json_objects, 1):
    if is_valid_json(json_obj):
        result = parse_json(json_obj)
        if result:
            parsed_data.append([f"Set {i}"] + result)

# Specify the CSV file name
csv_filename = "20240818_claude_output_chunk_2nd.csv"

# Column names for the CSV
columns = ['set #', '1st choice', '2nd choice', '3rd choice', 
           'Justification for 1st choice (EI)', 'Justification for 1st choice (NS)', 
           'Justification for 1st choice (TF)', 'Justification for 1st choice (PJ)']

# Writing to CSV file
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(columns)
    writer.writerows(parsed_data)

print(f"Data has been written to {csv_filename}")
print(f"Total entries processed: {len(parsed_data)}")


Data has been written to 20240818_claude_output_chunk_2nd.csv
Total entries processed: 186
