In [2]:
import json
import os
from pathlib import Path

def build_unified_json():
    """
    Build unified JSON files by combining data from multiple sources:
    - fallos_json: Main ruling content
    - articulos_estructurados: Citations data  
    - summaries: Core summaries
    """
    
    # Define paths
    base_path = Path("datasets")
    fallos_path = base_path / "fallos_json"
    articulos_path = base_path / "articulos_estructurados"
    summaries_path = base_path / "summaries"
    output_path = base_path / "unified_json"
    
    # Create output directory
    output_path.mkdir(exist_ok=True)
    
    # Process each month folder
    for month_folder in fallos_path.iterdir():
        if not month_folder.is_dir():
            continue
            
        mes = month_folder.name
        print(f"Processing month: {mes}")
        
        # Create month output directory
        month_output_path = output_path / mes
        month_output_path.mkdir(exist_ok=True)
        
        # Process each ruling in the month
        for fallo_file in month_folder.glob("*.json"):
            id_fallo = fallo_file.stem  # filename without extension
            
            try:
                # Load main ruling data
                with open(fallo_file, 'r', encoding='utf-8') as f:
                    fallo_data = json.load(f)
                
                # Handle case where fallo_data is a list instead of dict
                if isinstance(fallo_data, list):
                    print(f"  ! Warning: {id_fallo} contains a list, taking first element")
                    fallo_data = fallo_data[0] if fallo_data else {}
                
                # Load citations data
                articulos_file = articulos_path / mes / f"{id_fallo}.json"
                articulos_citados = {}
                if articulos_file.exists():
                    with open(articulos_file, 'r', encoding='utf-8') as f:
                        articulos_data = json.load(f)
                        # Handle list case for articulos too
                        if isinstance(articulos_data, list):
                            articulos_data = articulos_data[0] if articulos_data else {}
                        articulos_citados = articulos_data.get("RULING_CITATIONS", {})
                
                # Load summary data
                summary_file = summaries_path / mes / f"{id_fallo}.json"
                idea_central = ""
                if summary_file.exists():
                    with open(summary_file, 'r', encoding='utf-8') as f:
                        summary_data = json.load(f)
                        # Handle list case for summaries too
                        if isinstance(summary_data, list):
                            summary_data = summary_data[0] if summary_data else {}
                        idea_central = summary_data.get("CORE_SUMMARY", "")
                
                # Build unified structure
                unified_json = {
                    "METADATOS": {
                        "ID_FALLO": id_fallo,
                        "MES": mes,
                        "ARTICULOS_CITADOS": articulos_citados
                    },
                    "MATERIA_PRELIMINAR": fallo_data.get("INFORMACION", {}).get("MATERIA_PRELIMINAR", ""),
                    "IDEA_CENTRAL": idea_central,
                    "CONTENIDO": fallo_data.get("CONTENIDO", {})
                }
                
                # Save unified JSON
                output_file = month_output_path / f"{id_fallo}.json"
                with open(output_file, 'w', encoding='utf-8') as f:
                    json.dump(unified_json, f, ensure_ascii=False, indent=2)
                
                print(f"  ✓ Created unified JSON for {id_fallo}")
                
            except Exception as e:
                print(f"  ✗ Error processing {id_fallo}: {str(e)}")
    
    print("Unified JSON generation completed!")

# Run the function
build_unified_json()

Processing month: 03
  ✓ Created unified JSON for 8985
  ✓ Created unified JSON for 8946
  ✓ Created unified JSON for 9029
  ✓ Created unified JSON for 9025
  ✓ Created unified JSON for 9024
  ✓ Created unified JSON for 9032
  ✓ Created unified JSON for 8947
  ✓ Created unified JSON for 8984
  ✓ Created unified JSON for 9019
  ✓ Created unified JSON for 9035
  ✓ Created unified JSON for 9023
  ✓ Created unified JSON for 8887
  ✓ Created unified JSON for 8940
  ✓ Created unified JSON for 8852
  ✓ Created unified JSON for 8916
  ✓ Created unified JSON for 8613
  ✓ Created unified JSON for 8903
  ✓ Created unified JSON for 8939
  ✓ Created unified JSON for 8997
  ✓ Created unified JSON for 8996
  ✓ Created unified JSON for 9020
  ✓ Created unified JSON for 8959
  ✓ Created unified JSON for 8840
  ✓ Created unified JSON for 9031
  ✓ Created unified JSON for 8973
  ✓ Created unified JSON for 8932
  ✓ Created unified JSON for 8949
  ✓ Created unified JSON for 9030
  ✓ Created unified JSON fo