In [76]:
import json
import csv
import os
import pandas as pd
from pathlib import Path

In [77]:
def process_csv(file_path, output_json_path):
    df = pd.read_csv(file_path)
    network_df = df[['AgtId', 'agt_name', 'PPName', 'Dat', 'stage_process', 'agt_type', 'year', 'signatory_type', 'actor_name', 'actor_type', 'PAX_Hyperlink']].copy()

    # Remove the (dd/mm/yyyy) date info from the agt_name
    network_df['agt_name'] = network_df['agt_name'].str.replace(r' \(\d{2}/\d{2}/\d{4}\)', '', regex=True)

    # Parse and normalize dates (handles dd/mm/yyyy or ISO)
    raw = network_df['Dat'].astype(str).str.strip()
    raw = raw.replace({'': pd.NA, 'nan': pd.NA, 'NaN': pd.NA, 'NA': pd.NA, 'N/A': pd.NA})
    parsed = pd.to_datetime(raw, format='%d/%m/%Y', errors='coerce', dayfirst=True)
    mask = parsed.isna() & raw.notna()
    if mask.any():
        parsed.loc[mask] = pd.to_datetime(raw.loc[mask], errors='coerce', dayfirst=False)
    network_df['Dat'] = parsed.dt.strftime('%d/%m/%Y')

    remaining_errors = network_df['Dat'].isna().sum()
    if remaining_errors > 0:
        print(f"Warning: {remaining_errors} rows could not be parsed and remain NaT.")

    network_df.rename(columns={'agt_description': 'description'}, inplace=True)
    network_df.rename(columns={'agt_name': 'Agt'}, inplace=True)
    network_df.rename(columns={'stage_process': 'stage_label'}, inplace=True)
    network_df.rename(columns={'Dat': 'date'}, inplace=True)

    network_df['Type'] = 'Peace Agreement'
    network_df['target_type'] = 'signatory'

    network_df.to_json(output_json_path, orient='records', indent=4)
    print(f"Saved {output_json_path}")

with open('country_code.json') as f:
    country_codes = json.load(f)

output_folder = Path('output')
output_folder.mkdir(exist_ok=True)

for entry in country_codes:
    country = entry['country']
    abbr = entry['abbr']
    fname = f"all_agts_with_{country.replace(' ', '_')}.csv"
    path_local = Path(fname)
    path_alt = Path('intermediary') / fname
    input_path = path_local if path_local.exists() else path_alt
    if not input_path.exists():
        print(f"Skip {country}: {input_path} not found")
        continue
    output_json_path = output_folder / f"{abbr}_agt.json"
    process_csv(input_path, output_json_path)


Saved output/RUS_agt.json
Saved output/UKG_agt.json
Saved output/CHN_agt.json
Saved output/UN_agt.json
Saved output/QAT_agt.json


In [78]:
def csv_to_json(country_codes, output_folder="output"):
    output_folder = Path(output_folder)
    output_folder.mkdir(exist_ok=True)

    for entry in country_codes:
        country = entry['country']
        abbr = entry['abbr']
        fname = f"actors_with_{country.replace(' ', '_')}.csv"
        path_local = Path(fname)
        path_alt = Path('intermediary') / fname
        input_path = path_local if path_local.exists() else path_alt
        if not input_path.exists():
            print(f"Skip {country}: {input_path} not found")
            continue
        df = pd.read_csv(input_path)
        json_file_path = output_folder / f"{abbr}_actor.json"
        df.to_json(json_file_path, orient='records', indent=4)
        print(f"Saved {json_file_path}")

with open('country_code.json') as f:
    country_codes = json.load(f)

csv_to_json(country_codes)


Saved output/RUS_actor.json
Saved output/UKG_actor.json
Saved output/CHN_actor.json
Saved output/UN_actor.json
Saved output/QAT_actor.json
