In [3]:
import json
import csv

# Load the JSON file
with open('ECHR_Corpus.json', 'r', encoding='utf-8') as file:
    data = json.load(file)

# Function to extract text from clauses based on start and end indices
# and create CSV files for each case
def create_csv_files(data):
    for case in data:
        case_name = case['name'].replace('.txt', '')  # Remove .txt from name for csv file
        clauses = case['clauses']
        arguments = case['arguments']
        text = case['text']

        # Create a dictionary for quick clause id to text mapping
        clause_text_map = {}
        for clause in clauses:
            start = clause['start']
            end = clause['end']
            clause_text_map[clause['_id']] = text[start:end]

        # Open CSV file for writing
        with open(f'{case_name}.csv', 'w', newline='', encoding='utf-8') as csvfile:
            writer = csv.writer(csvfile)
            writer.writerow(['text', 'label'])  # Write header

            # Iterate over each subargument
            for subarg in arguments:
                premises = subarg.get('premises', [])
                conclusion = subarg.get('conclusion', '')  # Get single conclusion ID

                # Write premises
                for pid in premises:
                    if pid in clause_text_map:
                        writer.writerow([clause_text_map[pid], 'premise'])

                # Write conclusion (single item, not a list)
                if conclusion and conclusion in clause_text_map:
                    writer.writerow([clause_text_map[conclusion], 'conclusion'])

                # Write an empty line after each subargument
                writer.writerow(['', ''])

# Call the function to create CSV files
create_csv_files(data)
