In [1]:
import json
from pathlib import Path
import re

BASE_DIR = Path("../data/council_documents/")
OUTPUT_FILE = Path("../data/events/meetings_metadata.jsonl")
OUTPUT_FILE.parent.mkdir(parents=True, exist_ok=True)

def slugify(text):
    return re.sub(r"[^a-z0-9]+", "_", text.lower()).strip("_")

meetings_metadata = {}

# Load existing metadata if file exists
if OUTPUT_FILE.exists():
    with open(OUTPUT_FILE) as f:
        for line in f:
            record = json.loads(line)
            meetings_metadata[record["meeting_id"]] = record

# Walk the committee/date folders
for committee_folder in BASE_DIR.iterdir():
    if not committee_folder.is_dir():
        continue

    committee_name = committee_folder.name
    committee_id = f"kent_cc__{slugify(committee_name)}"

    for meeting_folder in committee_folder.iterdir():
        if not meeting_folder.is_dir():
            continue

        date_str = meeting_folder.name
        summary_file = meeting_folder / "summary.txt"
        meeting_id = f"{date_str}_{committee_id}"
        folder_path = str(meeting_folder)

        # Load or create metadata
        record = meetings_metadata.get(meeting_id, {
            "meeting_id": meeting_id,
            "committee_id": committee_id,
            "meeting_date": date_str,
            "folder_path": folder_path
        })

        # Inject summary if available
        if summary_file.exists():
            record["summary"] = summary_file.read_text().strip()

        meetings_metadata[meeting_id] = record

# Save to JSONL
with open(OUTPUT_FILE, "w") as f:
    for rec in meetings_metadata.values():
        f.write(json.dumps(rec) + "\n")

print(f"✅ Extracted and saved {len(meetings_metadata)} meeting records to {OUTPUT_FILE}")

✅ Extracted and saved 29 meeting records to ../data/events/meetings_metadata.jsonl
