In [1]:
# Ingestory System


In [10]:
# === Google News Local feed (readable version) ===
# Works in Colab/Jupyter. Just change PLACE.

!pip -q install feedparser

import requests, feedparser

# Change this to your city/county
PLACE = "New York NY"

# Short header string = polite, but simple
HEADERS = {"User-Agent": "CivicPulse/0.1"}

def build_feed_url(place):
    """Return the Google News local RSS feed URL for a place."""
    base = "https://news.google.com/rss/local/section/geo/"
    tail = "?hl=en-US&gl=US&ceid=US:en"
    return base + requests.utils.quote(place) + tail

def fetch_feed(url):
    """Download and parse the RSS feed into a Python object."""
    r = requests.get(url, headers=HEADERS, timeout=20)
    r.raise_for_status()
    return feedparser.parse(r.text)

# Build URL and fetch results
feed_url = build_feed_url(PLACE)
feed = fetch_feed(feed_url)

print(f"Local feed URL:\n{feed_url}\n")
print(f"Found {len(feed.entries)} stories:\n")

# Print titles + metadata
for i, story in enumerate(feed.entries, 1):
    # print(story)
    print(story.source.title)
    print(f"{i:02d}. {story.title}")
    print(f"    {story.published}")
    print(f"    {story.link}\n")
    print(story)



Local feed URL:
https://news.google.com/rss/local/section/geo/New%20York%20NY?hl=en-US&gl=US&ceid=US:en

Found 75 stories:

Gothamist
01. Broadway musicians and actors are mulling strikes. It could spell trouble for NYC theater. - Gothamist
    Mon, 13 Oct 2025 15:32:00 GMT
    https://news.google.com/rss/articles/CBMitgFBVV95cUxONlRjR0RZX2NPTWl1dFZvUzY0cjFib0RKUTc4ZzFKMDV6Z0x5WkQ3d2hla24yRDlaUFMyeldYTnFVakRZZUVnZms5TlpaUHk0NzdDYWhTTVhwckxiSHBxYVM1MFN6NVV1aWJrUVN2dDdZcTM3ZE5JM0xPVXR4YW85Zl9UN3pxTkVOWnhacnhoNkZPWTE3Mnk1aEVjeXgyNE84YXd1YWVHa2dVYTZfZHh3R3A1SGs4UQ?oc=5

{'title': 'Broadway musicians and actors are mulling strikes. It could spell trouble for NYC theater. - Gothamist', 'title_detail': {'type': 'text/plain', 'language': None, 'base': '', 'value': 'Broadway musicians and actors are mulling strikes. It could spell trouble for NYC theater. - Gothamist'}, 'links': [{'rel': 'alternate', 'type': 'text/html', 'href': 'https://news.google.com/rss/articles/CBMitgFBVV95cUxONlRjR0RZX2NP

In [15]:
feed.entries

[{'title': 'What to know about vaccines in NY as students go back to school - Gothamist',
  'title_detail': {'type': 'text/plain',
   'language': None,
   'base': '',
   'value': 'What to know about vaccines in NY as students go back to school - Gothamist'},
  'links': [{'rel': 'alternate',
    'type': 'text/html',
    'href': 'https://news.google.com/rss/articles/CBMilAFBVV95cUxNbDhIUzNfZ01BcGVQd3BQR01kTTQtUm9HOFljcXRla3JDV2JGRENPNXRXVVQyT2RWZ3c0RGx6NXRRM25rcnB4Umh2NkZ6QmZtamdJRWpkaE9UeDdKc3E5Rnp5V2NyYnVHTW54bVRYSG1BTE1EcE14TkxmZU5jT29lMUNVVTNaX2dpcXBJZUx4UEg4cEJF?oc=5'}],
  'link': 'https://news.google.com/rss/articles/CBMilAFBVV95cUxNbDhIUzNfZ01BcGVQd3BQR01kTTQtUm9HOFljcXRla3JDV2JGRENPNXRXVVQyT2RWZ3c0RGx6NXRRM25rcnB4Umh2NkZ6QmZtamdJRWpkaE9UeDdKc3E5Rnp5V2NyYnVHTW54bVRYSG1BTE1EcE14TkxmZU5jT29lMUNVVTNaX2dpcXBJZUx4UEg4cEJF?oc=5',
  'id': 'CBMilAFBVV95cUxNbDhIUzNfZ01BcGVQd3BQR01kTTQtUm9HOFljcXRla3JDV2JGRENPNXRXVVQyT2RWZ3c0RGx6NXRRM25rcnB4Umh2NkZ6QmZtamdJRWpkaE9UeDdKc3E5Rnp5V2NyYnVHTW54bV

In [None]:
# === Label Google News Local feed titles with CIN taxonomy ===
# Assumes `feed`, `PLACE`, and `feed_url` already exist from your previous cell.

!pip -q install openai pandas

import os, json, time
import pandas as pd
import requests
from urllib.parse import urlparse
from openai import OpenAI

# --- 0) OpenAI setup ---
# client = OpenAI()
client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
# client = OpenAI(api_key="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")

MODEL = "gpt-4o-mini"   # fast & affordable
BATCH_SIZE = 25         # label many titles per API call
SLEEP_BETWEEN = 0.2     # short pause between calls

# --- 1) Build a DataFrame from the feed ---
rows = []
for e in feed.entries:
    title = getattr(e, "title", "").strip()
    link = getattr(e, "link", "")
    published = getattr(e, "published", "") or getattr(e, "updated", "")
    source = ""
    if hasattr(e, "source") and isinstance(e.source, dict):
        source = e.source.get("title") or ""
    rows.append({
        "place": PLACE,
        "title": title,
        "link": link,
        "published": published,
        "source": source,
        "domain": urlparse(link).netloc,
        "feed_url": feed_url,
    })

df = pd.DataFrame(rows)
print(f"DataFrame built: {len(df)} rows")
display(df.head(5))

# --- 2) Your taxonomy + instructions (7 + other) ---
CIN_LABELS = [
    "emergencies_risks",       # 1) Emergencies and risks
    "health_welfare",          # 2) Health and welfare
    "education",               # 3) Education
    "transportation",          # 4) Transportation
    "economic_opportunities",  # 5) Economic opportunities
    "environment",             # 6) Environment
    "civic_information",       # 7) Civic information
    "political_information",   # 8) Political information
    "nonlocal",                # 9) Non-local/national/international news
    "other"                    # spillover
]

FEW_SHOTS = [
  # 1) emergencies_risks
  {"title": "Subway stabbing at Midtown station; suspect sought", "label": "emergencies_risks"},
  {"title": "Water main break triggers boil advisory downtown",    "label": "emergencies_risks"},

  # 2) health_welfare
  {"title": "City issues heat advisory; cooling centers open",     "label": "health_welfare"},
  {"title": "County clinic adds free vaccination hours Saturday",  "label": "health_welfare"},

  # 3) education
  {"title": "School calendar: holidays and parent-teacher nights", "label": "education"},
  {"title": "Teachers union and district reach tentative contract", "label": "education"},

  # 4) transportation
  {"title": "Bridge lane closure causes detour on Route 2",        "label": "transportation"},
  {"title": "Transit authority installs 80 EV chargers at hub",    "label": "transportation"},

  # 5) economic_opportunities
  {"title": "City launches small-business training grants",        "label": "economic_opportunities"},
  {"title": "Job fair to feature apprenticeships and CDL roles",   "label": "economic_opportunities"},

  # 6) environment
  {"title": "Air quality alert due to wildfire smoke",             "label": "environment"},
  {"title": "River restoration project opens new trail access",    "label": "environment"},

  # 7) civic_information
  {"title": "City Council passes $4.1B budget for sanitation",     "label": "civic_information"},
  {"title": "Judge blocks city plan to relocate migrant families", "label": "civic_information"},

  # 8) political_information
  {"title": "Redistricting map could boost one party",             "label": "political_information"},
  {"title": "Mayoral candidate launches campaign rally",           "label": "political_information"},

  # 9) nonlocal 
  {"title": "Putin Finds a Growing Embrace on the Global Stage", "label": "nonlocal"},
  {"title": "Fed should be independent but has made mistakes", "label": "nonlocal"},
  {"title": "Crime Crackdown in D.C. Shows Trump Administration's Policy", "label": "nonlocal"},
  {"title": "Xi, Putin and Modi Try to Signal Unity at China Summit", "label": "nonlocal"},
  {"title": "Russia Suspected of Jamming GPS for E.U. Leader's Plane", "label": "nonlocal"},
  {"title": "Supreme Court to hear case on federal immigration policy", "label": "nonlocal"},


  # other
  {"title": "Museum hosts free night for city workers",            "label": "other"},
  {"title": "Former official reveals Parkinson’s diagnosis",       "label": "other"},
]

def build_fewshot_block(fewshots):
    lines = [f'Headline: "{ex["title"]}"\nLabel: {ex["label"]}' for ex in fewshots]
    return "Examples:\n\n" + "\n\n".join(lines)

FEW_SHOT_TEXT = build_fewshot_block(FEW_SHOTS)

SYSTEM_INSTRUCTIONS = f"""
You are a careful classifier for LOCAL news headlines for {PLACE}. Choose exactly ONE label from:
{', '.join(CIN_LABELS)}.

Definitions (map each headline to the single best-fitting domain):
1) Emergencies and risks — immediate/long-term threats: crime, accidents, severe weather, outages, disasters.
2) Health and welfare — hospitals, clinics, disease/outbreaks, public advisories, group-specific health information.
3) Education — schools, teachers, students, universities, closures, policies, parent/child educational choices.
4) Transportation — roads, traffic, transit systems, costs, schedules, detours, rail/subway/ferry, charging.
5) Economic opportunities — jobs, job training, small-business assistance, affordability/cost-of-living.
6) Environment — air/water quality, environmental hazards, climate impacts, restoration/recreation/parks.
7) Civic information — government services, council/courts, ordinances, budgets, agency notices, civic associations.
8) Political information — candidates, campaigns, elections, parties, partisan conflict, public policy debates.
9) Nonlocal — national/international news, federal policy, foreign affairs, stories not directly relevant to local residents.
10) Other — everything else (sports, arts, entertainment, celebrity/individual health updates, human interest).

Tie-break priority if multiple seem plausible:
nonlocal > emergencies_risks > civic_information > political_information > transportation > health_welfare > education > economic_opportunities > environment > other.

IMPORTANT: Be aggressive about labeling as "nonlocal". If a story is primarily about national politics, international affairs, or federal policy (even if mentioned by local outlets), label it "nonlocal".

Return ONLY a JSON array. For each item:
{{"id": <int>, "category": <one of labels>, "confidence": <0..1>, "reason": "<=20 words>"}}

{FEW_SHOT_TEXT}
"""
# --- 3) Batch classify titles (cheaper/faster than per-row calls) ---
def classify_batch(titles):
    items = [{"id": i+1, "title": t} for i, t in enumerate(titles)]
    user_msg = (
        "Classify each headline below. Respond ONLY with a JSON array of objects "
        "with keys: id, category, confidence, reason.\n\n"
        + json.dumps(items, ensure_ascii=False)
    )

    try:
        # CORRECT OpenAI API call
        resp = client.chat.completions.create(
            model=MODEL,
            temperature=0,
            response_format={"type": "json_object"},
            messages=[
                {"role": "system", "content": SYSTEM_INSTRUCTIONS},
                {"role": "user", "content": user_msg},
            ],
        )
        raw = resp.choices[0].message.content
        print(f"Raw API response: {raw}")  # Debug output
        
        data = json.loads(raw)
        
        # Handle different response formats
        if not isinstance(data, list):
            if "results" in data:
                data = data["results"]
            elif "items" in data:
                data = data["items"]
            else:
                data = [data] if isinstance(data, dict) else []
        
        # sanitize categories
        for r in data:
            cat = (r.get("category") or "other").strip().lower()
            r["category"] = cat if cat in CIN_LABELS else "other"
            r["confidence"] = float(r.get("confidence", 0))
            r["reason"] = str(r.get("reason", ""))[:140]
        return data
        
    except Exception as e:
        print(f"Error in API call: {e}")
        # fallback: everything "other"
        return [{"id": i+1, "category":"other", "confidence":0.2, "reason":"fallback"} for i in range(len(titles))]

# --- 4) Run batching over the DataFrame and merge results ---
labels = []
indices = []
for start in range(0, len(df), BATCH_SIZE):
    sub = df.iloc[start:start+BATCH_SIZE]
    results = classify_batch(sub["title"].fillna("").tolist())
    # align results by order
    for idx, r in zip(sub.index, results):
        labels.append({"cin_label": r["category"], "cin_confidence": r["confidence"], "cin_reason": r["reason"]})
        indices.append(idx)
    time.sleep(SLEEP_BETWEEN)

lab_df = pd.DataFrame(labels, index=indices)
labeled = df.join(lab_df)

# --- 5) Quick looks + save ---
labeled["published"] = pd.to_datetime(labeled["published"], errors="coerce", utc=True)
labeled = labeled.sort_values("published", ascending=False)

display(labeled[["published","title","source","cin_label","cin_confidence","cin_reason"]].head(15))
print("\nLabel distribution:\n", labeled["cin_label"].value_counts())


out_path = f"local_news_labeled_{PLACE.replace(' ','_')}.csv"
labeled.to_csv(out_path, index=False)
print("\nSaved ->", out_path)

# Filter out nonlocal content for summary generation
original_count = len(labeled)
labeled = labeled[labeled["cin_label"] != "nonlocal"].reset_index(drop=True)
filtered_count = len(labeled)
nonlocal_removed = original_count - filtered_count

print(f"\nFiltered out {nonlocal_removed} nonlocal stories")
print(f"Keeping {filtered_count} local stories for civic digest")
print("\nFiltered label distribution:\n", labeled["cin_label"].value_counts())


DataFrame built: 75 rows


Unnamed: 0,place,title,link,published,source,domain,feed_url
0,New York NY,What to know about vaccines in NY as students ...,https://news.google.com/rss/articles/CBMilAFBV...,"Mon, 01 Sep 2025 12:00:00 GMT",Gothamist,news.google.com,https://news.google.com/rss/local/section/geo/...
1,New York NY,Seniors get free on-demand rides as New York e...,https://news.google.com/rss/articles/CBMiqgFBV...,"Mon, 01 Sep 2025 14:48:28 GMT",News 12,news.google.com,https://news.google.com/rss/local/section/geo/...
2,New York NY,5 Places to Celebrate Oktoberfest in NYC - Pla...,https://news.google.com/rss/articles/CBMieEFVX...,"Mon, 01 Sep 2025 13:00:00 GMT",Playbill,news.google.com,https://news.google.com/rss/local/section/geo/...
3,New York NY,"Xi, Putin and Modi Try to Signal Unity at Chin...",https://news.google.com/rss/articles/CBMiygRBV...,"Mon, 01 Sep 2025 09:04:04 GMT",The New York Times,news.google.com,https://news.google.com/rss/local/section/geo/...
4,New York NY,India Was the Economic Alternative to China. T...,https://news.google.com/rss/articles/CBMizgRBV...,"Mon, 01 Sep 2025 04:00:09 GMT",The New York Times,news.google.com,https://news.google.com/rss/local/section/geo/...


Raw API response: {
  "results": [
    {"id": 1, "category": "education", "confidence": 0.9, "reason": "Discusses vaccines as students return to school."},
    {"id": 2, "category": "transportation", "confidence": 0.8, "reason": "Focuses on transportation services for seniors."},
    {"id": 3, "category": "other", "confidence": 0.7, "reason": "Cultural event coverage, not fitting other categories."},
    {"id": 4, "category": "political_information", "confidence": 0.8, "reason": "Discusses international leaders and their unity."},
    {"id": 5, "category": "economic_opportunities", "confidence": 0.7, "reason": "Analyzes economic alternatives in a political context."},
    {"id": 6, "category": "emergencies_risks", "confidence": 0.9, "reason": "Reports on a significant earthquake and casualties."},
    {"id": 7, "category": "political_information", "confidence": 0.8, "reason": "Focuses on global political discussions."},
    {"id": 8, "category": "political_information", "confidence": 0

Unnamed: 0,published,title,source,cin_label,cin_confidence,cin_reason
58,2025-09-01 16:28:16+00:00,Brooklyn’s Michelin-starred taqueria is coming...,Time Out Worldwide,other,0.6,Restaurant opening is entertainment-related.
55,2025-09-01 16:28:16+00:00,The team behind Semma is finally opening their...,Time Out Worldwide,other,0.6,Opening of a restaurant is entertainment-related.
37,2025-09-01 15:39:47+00:00,A last view of a Tribeca mural - Tribeca Citizen,Tribeca Citizen,other,0.5,Artistic commentary on a mural.
19,2025-09-01 15:15:50+00:00,Brazen robber walks off with entire cash regis...,Audacy,emergencies_risks,0.9,Reports on a crime spree involving robbery.
38,2025-09-01 15:04:28+00:00,Comptroller Lander’s Office Hits $15 Million M...,NYC.gov,economic_opportunities,0.8,Securing back wages for workers.
1,2025-09-01 14:48:28+00:00,Seniors get free on-demand rides as New York e...,News 12,transportation,0.8,Focuses on transportation services for seniors.
7,2025-09-01 14:07:04+00:00,Putin says NATO expansion must be addressed fo...,Reuters,political_information,0.8,Addresses NATO and Ukraine peace discussions.
35,2025-09-01 13:59:00+00:00,Off-Broadway's Heathers Will Tour the U.S. - P...,Playbill,other,0.6,Entertainment news about a theater tour.
20,2025-09-01 13:55:00+00:00,Here’s why N.Y. Gov. Hochul has lit up these l...,SILive.com,civic_information,0.7,Explains the significance of landmarks lit for...
21,2025-09-01 13:15:45+00:00,Retired NYC workers gear up for rally at City ...,amNewYork,civic_information,0.8,Discusses a rally for healthcare protection.



Label distribution:
 cin_label
other                     28
emergencies_risks         11
political_information      8
economic_opportunities     7
civic_information          7
environment                5
transportation             4
health_welfare             3
education                  2
Name: count, dtype: int64

Saved -> local_news_labeled_New_York_NY.csv


In [None]:
## ROLES NEEDED

# BIG PICTURE/OVERALL OVERSIGHT
# OVERALL PRODUCT DELIVERY / UX DESIGN
# ENGINEERING: (MAIN PRIORITY)
# DATA - AWS INFRA DEVELOPMENT PIPELINE
# DATA - SUMMARIZATION QUALITY 
# DATA - INPUT SOURCES TRADEOFFS

In [None]:
from collections import defaultdict
from datetime import timezone
import math, json

# --- 0) Pretty names + ordering for sections ---
CIN_ORDER = [
    "emergencies_risks",
    "civic_information",
    "political_information",
    "transportation",
    "health_welfare",
    "education",
    "economic_opportunities",
    "environment",
    "other",
]
CIN_PRETTY = {
    "emergencies_risks": "Emergencies & Risks",
    "civic_information": "Civic Information",
    "political_information": "Political Information",
    "transportation": "Transportation",
    "health_welfare": "Health & Welfare",
    "education": "Education",
    "economic_opportunities": "Economic Opportunities",
    "environment": "Environment",
    "other": "Other",
}

## NAIVE SCORING MECHANISM = AFTER WE INGEST THE RAW DATA, HOW DO WE ANALYZE/SCORE IT? 

# --- 1) Scoring + selection helper reused for each section ---
def importance_score(row, now=None):
    now = now or pd.Timestamp.now(tz=timezone.utc)
    pub = row["published"]
    hours = 72 if pd.isna(pub) else max(0.0, (now - pub).total_seconds()/3600.0)
    recency = math.exp(-math.log(2) * hours / 24.0)  # 24h half-life
    conf = float(row.get("cin_confidence", 0.5))
    # small boost for high-confidence items
    return 0.65 * recency + 0.35 * conf

def select_top_in_cat(df_cat, k=4):
    if df_cat.empty:
        return df_cat
    d = df_cat.copy()
    d["score"] = d.apply(importance_score, axis=1)
    return d.sort_values(["score","published"], ascending=[False, False]).head(k)

def format_items_for_context(df_cat):
    lines = []
    for _, r in df_cat.iterrows():
        ts = r["published"].strftime("%Y-%m-%d %H:%M UTC") if not pd.isna(r["published"]) else "unknown"
        src = r["source"] or r["domain"]
        lines.append(f"- {r['title']} (source: {src}, {ts})\n  Link: {r['link']}")
    return "\n".join(lines)

# --- 2) Build per-section contexts ---
per_section = {}

for cat in CIN_ORDER:
    cat_df = labeled[labeled["cin_label"] == cat]
    top_cat = select_top_in_cat(cat_df, k=4)  # adjust k per your preference
    if not top_cat.empty:
        per_section[cat] = format_items_for_context(top_cat)


# --- Examples as variables for consistent section formatting ---
SECTION_EXAMPLES = {
    "emergencies_risks": {
        "topline": "Recent incidents include a street takeover in Secaucus and a subway station stabbing, highlighting ongoing safety concerns.",
        "bullets": [
            "A chaotic street takeover in Secaucus led to police vehicles being surrounded. [Read](link)",
            "Former mayor hospitalized after car crash on Manhattan bridge. [Read](link)",
            "Three men injured in East Village shooting outside residential building. [Read](link)"
        ]
    },
    "civic_information": {
        "topline": "City Council passed budget measures while courts addressed key legal challenges affecting local governance.",
        "bullets": [
            "Judge temporarily blocks removal of Guatemalan children amid legal challenges. [Read](link)",
            "City Council approves $4.1B sanitation budget for improved waste management. [Read](link)"
        ]
    },
    "transportation": {
        "topline": "Transit disruptions continue with light rail cancellations and bridge lane closures affecting commuters.",
        "bullets": [
            "NJ Transit canceled nearly 100 light rail trains due to signal issues. [Read](link)",
            "Manhattan Bridge lane closure causes delays through Thursday morning. [Read](link)"
        ]
    }
}

def build_examples_text(examples_dict):
    """Convert examples dict to formatted text for prompts"""
    lines = []
    for category, content in examples_dict.items():
        pretty_name = CIN_PRETTY.get(category, category.title())
        lines.append(f"**{pretty_name} Example:**")
        lines.append(content["topline"])
        lines.append("")
        for bullet in content["bullets"]:
            lines.append(f"- {bullet}")
        lines.append("")
    return "\n".join(lines)

EXAMPLES_TEXT = build_examples_text(SECTION_EXAMPLES)

# Updated SECTION_SYSTEM with examples
SECTION_SYSTEM = f"""
You are an editor summarizing local news for a single domain.
Summarize only items provided. Be concise, factual, non-duplicative.

FORMAT REQUIREMENTS:
- Start with a one-sentence topline (≤ 25 words)
- Then 2–4 bullets. Each bullet: 1 sentence (≤ 22 words), include [Read](URL) link
- Include timestamps only if timing is critical
- Follow the exact format shown in examples below

{EXAMPLES_TEXT}

Output Markdown following this exact format. Do not invent facts. Use provided URLs only.
"""

def summarize_section(cat_key, context):
    pretty = CIN_PRETTY.get(cat_key, cat_key.title())
    user_msg = f"""Domain: {pretty}
Items:
{context}

Write Markdown for this domain only. Do not invent facts. Include links as [Read](URL)."""
    resp = client.chat.completions.create(
        model=MODEL,
        temperature=0.2,
        messages=[
            {"role": "system", "content": SECTION_SYSTEM},
            {"role": "user", "content": user_msg},
        ],
    )
    return f"### {pretty}\n\n" + resp.choices[0].message.content.strip()

section_markdowns = []
sections_map = {}

for cat in CIN_ORDER:
    ctx = per_section.get(cat)
    if ctx:
        md_block = summarize_section(cat, ctx)
        section_markdowns.append(md_block)

        # NEW: also save a structured version
        sections_map[cat] = {
            "title": CIN_PRETTY.get(cat, cat.title()),
            "summary_md": md_block,
            "items": ctx.splitlines()  # keep minimal; replace later with a parser if you want
        }

# --- 4) (Optional) Global top line from all included items ---
all_ctx = "\n".join(per_section[c] for c in CIN_ORDER if c in per_section)
TOPLINE_SYSTEM = "Write a single 1–2 sentence 'Top Line' (<= 50 words) summarizing the most important cross-domain updates. Use only the provided items."
topline_resp = client.chat.completions.create(
    model=MODEL,
    temperature=0.2,
    messages=[
        {"role": "system", "content": TOPLINE_SYSTEM},
        {"role": "user", "content": all_ctx},
    ],
)
topline = topline_resp.choices[0].message.content.strip()

# --- 5) Stitch final Markdown and save ---
final_md = topline + "\n\n" + "\n\n".join(section_markdowns)
print(final_md[:1500])

md_path = f"civicpulse_sections_{PLACE.replace(' ','_')}.md"
with open(md_path, "w", encoding="utf-8") as f:
    f.write(final_md)
print("Saved ->", md_path)


**Top Line:** In recent news, an NYPD officer was struck in a hit-and-run in Brooklyn, while a fire in Williamsburg injured eight people; additionally, a judge temporarily blocked the removal of Guatemalan children amid rising immigration arrests in NYC.

### Emergencies & Risks

Recent incidents highlight ongoing safety concerns in Brooklyn and Secaucus, with multiple injuries reported.

- An NYPD officer was struck by a hit-and-run driver in Brooklyn, prompting an investigation. [Read](https://news.google.com/rss/articles/CBMigwFBVV95cUxPdWk3SnhPbmtkbzl4dWk3TUxOdEJLSkpEMkUxeWFvbTBOdWhjeXVWRGhUb0tGUnRiMlZlOU95VVMtZG51YWhZQkVaVEJKd1dZMnRXd2dmVUR1WWtFdHJ6bnpKWnJKbHRYQ3VwRkxaT19lMGJfeVhZMVVvY3lRczJBYnF0SdIBiAFBVV95cUxPQ3FXU0VCX25sZjI0YWw0c3V6YjZmeDJVVGZ0UmJxUWM5ZHV1cVBja2l3d3ZUZGN3NFNESENRNHczN2I3emlWYWR4QzgtTnZqeEdNQnJLWVFlRmY2c0MwLVdwcVdkYW90VlNtamNJRUswODJzWkwxOGkyQUkzVGJEdV9qNzZrbHct?oc=5)
- Eight people were injured in a fire in Williamsburg, with emergency services responding promp

In [None]:
from datetime import datetime, timezone
final_json = {
    "place": PLACE,
    "generated_at_utc": datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ"),
    "topline_md": topline,                 # topline included here
    "order": [k for k in CIN_ORDER if k in sections_map],
    "sections": sections_map                # {"civic_information": {...}, ...}
}
json_path = f"civicpulse_digest_{PLACE.replace(' ','_')}.json"
with open(json_path, "w", encoding="utf-8") as f:
    json.dump(final_json, f, ensure_ascii=False, indent=2)
print("Saved ->", json_path)

Saved -> civicpulse_digest_New_York_NY.json


In [10]:
final_json

{'place': 'New York NY',
 'generated_at_utc': '2025-09-01T06:55:05Z',
 'topline_md': '**Top Line:** In recent news, an NYPD officer was struck in a hit-and-run in Brooklyn, while a fire in Williamsburg injured eight people; additionally, a judge temporarily blocked the removal of Guatemalan children amid rising immigration arrests in NYC.',
 'order': ['emergencies_risks',
  'civic_information',
  'political_information',
  'transportation',
  'health_welfare',
  'education',
  'economic_opportunities',
  'environment',
  'other'],
 'sections': {'emergencies_risks': {'title': 'Emergencies & Risks',
   'summary_md': '### Emergencies & Risks\n\nRecent incidents highlight ongoing safety concerns in Brooklyn and Secaucus, with multiple injuries reported.\n\n- An NYPD officer was struck by a hit-and-run driver in Brooklyn, prompting an investigation. [Read](https://news.google.com/rss/articles/CBMigwFBVV95cUxPdWk3SnhPbmtkbzl4dWk3TUxOdEJLSkpEMkUxeWFvbTBOdWhjeXVWRGhUb0tGUnRiMlZlOU95VVMtZG51YWh

In [66]:
final_json

{'place': 'New York NY',
 'generated_at_utc': '2025-09-01T05:53:06Z',
 'topline_md': '**Top Line:** In recent news, Secaucus police responded to a chaotic street takeover, while former NYC mayor Rudy Giuliani was hospitalized after a car crash; additionally, a judge temporarily blocked the removal of Guatemalan children amid rising immigration arrests and deportations in NYC.',
 'sections': [{'key': 'emergencies_risks',
   'title': 'Emergencies & Risks',
   'markdown': "### Emergencies & Risks\n\n- A chaotic street takeover in Secaucus led to police vehicles being surrounded during a 'riotous' incident. [Read](https://news.google.com/rss/articles/CBMisAFBVV95cUxQS0tKX3A4Tm9JTmE1VXBIeHQ4VEVMZWpYek01M2tMQkp2Y205NVVrcFZQd0JiZ2lRc0xmV1JkSUtWV20tampleDJsaDN5c2NxRmRYVU5DQUZNVDZybHZueGd0ZXlUel9BeHViY1hCbWRQYWpGLXpxUjNnSjd1LWhSVDhhdjIwNGZEOEpYdV9YMmNMVUtvVGV1WGJsSzdlTlMtQUtyb28xa0x1MngxZU12cg?oc=5)\n\n- Former New York mayor Rudy Giuliani was hospitalized following a car crash. [Read](https://

In [59]:
section_markdowns

['### Emergencies & Risks\n\n- A street takeover in Secaucus led to police vehicles being surrounded during a chaotic scene. [Read](https://news.google.com/rss/articles/CBMisAFBVV95cUxQS0tKX3A4Tm9JTmE1VXBIeHQ4VEVMZWpYek01M2tMQkp2Y205NVVrcFZQd0JiZ2lRc0xmV1JkSUtWV20tampleDJsaDN5c2NxRmRYVU5DQUZNVDZybHZueGd0ZXlUel9BeHViY1hCbWRQYWpGLXpxUjNnSjd1LWhSVDhhdjIwNGZEOEpYdV9YMmNMVUtvVGV1WGJsSzdlTlMtQUtyb28xa0x1MngxZU12cg?oc=5)\n\n- Former New York mayor Rudy Giuliani was hospitalized following a car crash. [Read](https://news.google.com/rss/articles/CBMiqAFBVV95cUxQV29BajBrVkdNYWdSQm5DSlRRalE2TDRqRjR0OTFQSERRdThJRzVGT3MyYjhFZnlWUmhJd3JTU3diTlNHWjVPdlpGLTFOczNuVkF2T05yQVlUdENsMHhsdGNpVU1ycnFYenljanBhSU5wRWhzX3N1Nll2WVlmN3lTTDc0R3FCR3dkU091X3NCNUl5UFlybnRjd1Q0OTl2SVQxU1JBNHYwY0w?oc=5)\n\n- Three men were injured in a shooting outside a building in East Village, Manhattan. [Read](https://news.google.com/rss/articles/CBMiogFBVV95cUxQYThTdzYycnFGWGJMX1VPSm5KMHpHUGNzS3lGMVpxU0tzZVhwNXBfUk9iVXh1c19Na2JnMU