In [1]:
import json
import os
from pathlib import Path

def relabel_json_files():
    # Define the mapping from old keys to new keys
    key_mapping = {
        "Employment": "Employment",
        "Housing": "HousingInstability",
        "Food": "FoodInsecurity", 
        "Financial": "FinancialStrain",
        "Transportation": "Transportation",
        "Childcare": "Childcare",
        "SubstanceAbuse": "SubstanceAbuse",
        "Safety": "Safety",
        "Permanency": "Permanency"
    }
    
    # Define the desired order
    desired_order = [
        "Employment",
        "HousingInstability", 
        "FoodInsecurity",
        "FinancialStrain",
        "Transportation",
        "Childcare",
        "Permanency",
        "SubstanceAbuse",
        "Safety"
    ]
    
    # Path to the granular directory
    granular_dir = Path("granular")
    
    if not granular_dir.exists():
        print(f"Directory {granular_dir} does not exist!")
        return
    
    # Process all JSON files
    json_files = list(granular_dir.glob("*.json"))
    total_files = len(json_files)
    
    print(f"Found {total_files} JSON files to process...")
    
    processed_count = 0
    error_count = 0
    
    for json_file in json_files:
        try:
            # Read the current JSON file
            with open(json_file, 'r', encoding='utf-8') as f:
                data = json.load(f)
            
            # Create new data structure with correct keys and order
            new_data = {}
            
            # Process each key in the desired order
            for new_key in desired_order:
                # Find the corresponding old key
                old_key = None
                for old, new in key_mapping.items():
                    if new == new_key:
                        old_key = old
                        break
                
                # Copy the data from old key to new key
                if old_key in data:
                    new_data[new_key] = data[old_key]
                else:
                    # If old key doesn't exist, initialize with empty list
                    new_data[new_key] = []
                    print(f"Warning: {json_file.name} missing key '{old_key}', initialized as empty list")
            
            # Write the updated JSON file
            with open(json_file, 'w', encoding='utf-8') as f:
                json.dump(new_data, f)
            
            processed_count += 1
            
            # Print progress every 100 files
            if processed_count % 100 == 0:
                print(f"Processed {processed_count}/{total_files} files...")
                
        except Exception as e:
            print(f"Error processing {json_file.name}: {str(e)}")
            error_count += 1
    
    print(f"\n=== RELABELING COMPLETE ===")
    print(f"Total files: {total_files}")
    print(f"Successfully processed: {processed_count}")
    print(f"Errors: {error_count}")
    
    return processed_count, error_count

# Run the relabeling function
relabel_json_files()


Found 3302 JSON files to process...
Processed 100/3302 files...
Processed 200/3302 files...
Processed 300/3302 files...
Processed 400/3302 files...
Processed 500/3302 files...
Processed 600/3302 files...
Processed 700/3302 files...
Processed 800/3302 files...
Processed 900/3302 files...
Processed 1000/3302 files...
Processed 1100/3302 files...
Processed 1200/3302 files...
Processed 1300/3302 files...
Processed 1400/3302 files...
Processed 1500/3302 files...
Processed 1600/3302 files...
Processed 1700/3302 files...
Processed 1800/3302 files...
Processed 1900/3302 files...
Processed 2000/3302 files...
Processed 2100/3302 files...
Processed 2200/3302 files...
Processed 2300/3302 files...
Processed 2400/3302 files...
Processed 2500/3302 files...
Processed 2600/3302 files...
Processed 2700/3302 files...
Processed 2800/3302 files...
Processed 2900/3302 files...
Processed 3000/3302 files...
Processed 3100/3302 files...
Processed 3200/3302 files...
Processed 3300/3302 files...

=== RELABELING 

(3302, 0)

In [3]:
granular_dir = Path("granular")

# Process all JSON files
json_files = list(granular_dir.glob("*.json"))
total_files = len(json_files)
for json_file in json_files:
    # Read the current JSON file
    with open(json_file, 'r', encoding='utf-8') as f:
        data = json.load(f)
        with open(json_file, 'w', encoding='utf-8') as f:
            json.dump(data, f)