# IVFC Daily Operations Report
**Station 187 - Ingomar Volunteer Fire Company**

Automated daily report covering rolling daily, weekly, and monthly incident statistics, weather risk, traffic risk, and a duty-crew narrative generated by Claude AI.

In [None]:
# Run this cell once to install dependencies (or include in your Dockerfile)
import subprocess, sys

packages = [
    "requests",
    "python-docx",
    "anthropic",
    "sendgrid",
    "beautifulsoup4",
    "lxml",
    "pytz",
    "pandas",
    "google-cloud-storage",
]

for pkg in packages:
    try:
        __import__(pkg.replace("-", "_").split("==")[0])
    except ImportError:
        print(f"Installing {pkg}...")
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])

print("All dependencies ready.")


## Section 1: Imports and Configuration

In [None]:
import os, re, io, json, base64, pytz, requests, warnings
import pandas as pd
from datetime import datetime, timedelta
from docx import Document
from docx.shared import Pt, RGBColor, Inches, Cm
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.oxml.ns import qn
from docx.oxml import OxmlElement
import anthropic

warnings.filterwarnings("ignore")

EASTERN = pytz.timezone("America/New_York")
NOW_ET   = datetime.now(EASTERN)
TODAY    = NOW_ET.date()

DAILY_START   = NOW_ET - timedelta(days=1)
WEEKLY_START  = NOW_ET - timedelta(days=7)
MONTHLY_START = NOW_ET - timedelta(days=30)

DAYTIME_START_HOUR = 6
DAYTIME_END_HOUR   = 18

print(f"Report generated: {NOW_ET.strftime('%Y-%m-%d %H:%M %Z')}")
print(f"Daily window:     {DAILY_START.strftime('%Y-%m-%d %H:%M')} to now")
print(f"Weekly window:    {WEEKLY_START.strftime('%Y-%m-%d %H:%M')} to now")
print(f"Monthly window:   {MONTHLY_START.strftime('%Y-%m-%d %H:%M')} to now")


## Section 2: Credentials and Settings

In [None]:
FIRSTDUE_EMAIL    = os.environ.get("FIRSTDUE_EMAIL",    "peterethanmuller@gmail.com")
FIRSTDUE_PASSWORD = os.environ.get("FIRSTDUE_PASSWORD", "Apache2019!firstdue")
ANTHROPIC_API_KEY = os.environ.get("ANTHROPIC_API_KEY", "sk-ant-api03-xxxxxxxxxxx")
SENDGRID_API_KEY  = os.environ.get("SENDGRID_API_KEY",  "SG.xxxxxxxxxxx")
TOMTOM_API_KEY    = os.environ.get("TOMTOM_API_KEY",    "cDNrnThDx91mq5SPwfWQSR5BcQzoREfM")
GCS_BUCKET_NAME   = os.environ.get("GCS_BUCKET_NAME",   "ivfc-reports-187")

EMAIL_FROM = os.environ.get("EMAIL_FROM", "pmuller@andrew.cmu.edu")
EMAIL_TO   = os.environ.get("EMAIL_TO",   "pmuller@andrew.cmu.edu,dbaity@andrew.cmu.edu,afrocha@andrew.cmu.edu,yanxuand@andrew.cmu.edu")
EMAIL_CC   = os.environ.get("EMAIL_CC",   "pmuller@andrew.cmu.edu")

INCIDENT_CATEGORIES = [
    ("Commercial AFA",            ["FIRE ALARM - COMMERCIAL", "FIRE ALARM - HIGH LIFE HAZARD",
                                   "FIRE ALARM - HIGH RISE", "FIRE ALARM - MULTI FAMILY"]),
    ("Residential AFA",           ["FIRE ALARM - RESIDENTIAL"]),
    ("Vehicle Crash w/ Injuries", ["CRASH - INJURIES", "ACCIDENT WITH INJURIES"]),
    ("Crash w/ Entrapment",       ["CRASH - TRAPPED", "CRASH - PINNED"]),
    ("Crash - Other/Unknown",     ["CRASH - UNKNOWN", "CRASH - HAZARDS",
                                   "CRASH - HIGH MECHANISM", "CRASH - INVOLVING A STRUCTURE"]),
    ("Natural Gas Odor/Leak",     ["NATURAL GAS"]),
    ("Structure Fire",            ["RESIDENTIAL BLDG FIRE", "COMMERCIAL BLDG FIRE",
                                   "POSSIBLE RES BLDG FIRE", "POSSIBLE COMM BLDG FIRE",
                                   "FIRE - HIGH LIFE HAZARD"]),
    ("Odor Investigation",        ["ODOR INVESTIGATION", "SMOKE INVESTIGATION",
                                   "CARBON MONOXIDE - RESIDENTIAL", "CARBON MONOXIDE - COMMERCIAL"]),
    ("Vehicle Fire",              ["VEHICLE FIRE", "VEHICLE FIRE - HIGHWAY"]),
    ("Brush/Grass/Woods Fire",    ["BRUSH FIRE", "GRASS FIRE", "WOODS FIRE", "MULCH FIRE"]),
    ("Electrical Hazard",         ["ELECTRICAL HAZARD", "ELECTRICAL FIRE"]),
    ("Water Emergency",           ["WATER EMERGENCY", "FLOOD/WATER CONDITION"]),
    ("Hazmat",                    ["HAZMAT", "FUEL SPILL - HIGHWAY", "FUEL SPILL"]),
    ("Medical Emergency",         ["MEDICAL EMERGENCY", "EMS ASSIST"]),
    ("Mutual Aid Given",          ["MUTUAL AID GIVEN", "MUTUAL AID - GIVEN"]),
    ("Mutual Aid Received",       ["MUTUAL AID RECEIVED", "MUTUAL AID - RECEIVED"]),
    ("Service Call",              ["SERVICE CALL", "ASSIST - POLICE", "ASSIST - OTHER AGENCY",
                                   "WIRES DOWN", "TREE DOWN", "PUBLIC ASSIST"]),
    ("Cancelled/No Activity",     ["CANCELLED EN ROUTE", "CANCELLED ON SCENE",
                                   "NO ACTIVITY FOUND", "UNFOUNDED"]),
    ("Other / Uncategorized",     []),
]

print("Configuration loaded.")
print(f"  Anthropic key: {'set' if ANTHROPIC_API_KEY else 'MISSING'}")
print(f"  TomTom key:    {'set' if TOMTOM_API_KEY else 'MISSING'}")
print(f"  SendGrid key:  {'set' if SENDGRID_API_KEY else 'MISSING'}")


## Section 3: First Due API (Authentication)
> Currently commented out - using CSV fallback. Uncomment when API access is granted.

In [None]:
def firstdue_get_token(email: str, password: str) -> str:
    url = "https://api.firstdue.com/fd-api/v1/auth/token"
    resp = requests.post(url, json={"email": email, "password": password}, timeout=30)
    resp.raise_for_status()
    return resp.json()["access_token"]

# FIRSTDUE_TOKEN = firstdue_get_token(FIRSTDUE_EMAIL, FIRSTDUE_PASSWORD)
# FIRSTDUE_HEADERS = {"Authorization": f"Bearer {FIRSTDUE_TOKEN}"}
FIRSTDUE_TOKEN = ""
FIRSTDUE_HEADERS = {}
print("First Due auth skipped - using CSV data source.")


## Section 4: First Due API (Fetch Dispatches)
> Commented out - using CSV fallback.

In [None]:
def fetch_all_dispatches(since_dt, headers: dict) -> list:
    url = "https://api.firstdue.com/fd-api/v1/dispatches"
    since_str = since_dt.astimezone(pytz.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
    params = {"since": since_str}
    all_items = []
    while url:
        resp = requests.get(url, headers=headers, params=params, timeout=30)
        resp.raise_for_status()
        data = resp.json()
        all_items.extend(data.get("data", data if isinstance(data, list) else []))
        link_header = resp.headers.get("Link", "")
        next_url = None
        for part in link_header.split(","):
            part = part.strip()
            if 'rel="next"' in part:
                match = re.search(r"<([^>]+)>", part)
                if match:
                    next_url = match.group(1)
                    break
        url = next_url
        params = {}
    return all_items

# if FIRSTDUE_TOKEN:
#     raw = fetch_all_dispatches(MONTHLY_START, FIRSTDUE_HEADERS)
#     print(f"Fetched {len(raw)} dispatches from First Due API.")
print("First Due fetch skipped - using CSV data source.")


## Section 5: Rolling Stats Functions

In [None]:
def filter_window(df: pd.DataFrame, start: datetime) -> pd.DataFrame:
    start_naive = start.replace(tzinfo=None)
    return df[df["timestamp"] >= start_naive].copy()

def categorize(incident_type: str) -> str:
    t = str(incident_type).strip().upper()
    for cat_name, keywords in INCIDENT_CATEGORIES:
        if cat_name == "Other / Uncategorized":
            continue
        for kw in keywords:
            if kw.upper() in t:
                return cat_name
    return "Other / Uncategorized"

def is_daytime(ts) -> bool:
    return DAYTIME_START_HOUR <= ts.hour < DAYTIME_END_HOUR

def build_incident_table(df: pd.DataFrame) -> pd.DataFrame:
    categories = [c[0] for c in INCIDENT_CATEGORIES]
    day_counts  = {c: 0 for c in categories}
    eve_counts  = {c: 0 for c in categories}
    day_total = 0
    eve_total = 0
    for _, row in df.iterrows():
        cat = row.get("category", "Other / Uncategorized")
        ts  = row["timestamp"]
        if is_daytime(ts):
            day_counts[cat] = day_counts.get(cat, 0) + 1
            day_total += 1
        else:
            eve_counts[cat] = eve_counts.get(cat, 0) + 1
            eve_total += 1
    rows = []
    for cat in categories:
        d = day_counts.get(cat, 0)
        e = eve_counts.get(cat, 0)
        if d > 0 or e > 0:
            rows.append({
                "Category":      cat,
                "Daytime":       d,
                "Evening/Night": e,
                "Total":         d + e,
            })
    totals_row = {
        "Category":      "TOTAL",
        "Daytime":       day_total,
        "Evening/Night": eve_total,
        "Total":         day_total + eve_total,
    }
    rows.append(totals_row)
    return pd.DataFrame(rows)

def summary_stats(df: pd.DataFrame, label: str) -> dict:
    if df.empty:
        return {"label": label, "total": 0, "daytime": 0, "evening": 0, "top_type": "N/A"}
    day_df = df[df["timestamp"].apply(is_daytime)]
    eve_df = df[~df["timestamp"].apply(is_daytime)]
    top = df["category"].value_counts()
    return {
        "label":    label,
        "total":    len(df),
        "daytime":  len(day_df),
        "evening":  len(eve_df),
        "top_type": top.index[0] if not top.empty else "N/A",
    }

print("Rolling stats functions defined.")


## Section 3b: Download CSV from First Due SizeUp (Playwright)
> Logs into sizeup.firstduesizeup.com and clicks the export button automatically.
> Result is uploaded to GCS and used as the data source in Section 4b.

In [None]:
import os, tempfile, subprocess, sys, json as _json

DOWNLOAD_DIR = tempfile.gettempdir()

# ── upload_to_gcs defined here (also used later in Section 11 for the report)
def upload_to_gcs(local_path: str, bucket_name: str, folder: str = "reports") -> str:
    try:
        from google.cloud import storage
        client = storage.Client()
        bucket = client.bucket(bucket_name)
        blob_name = f"{folder}/{os.path.basename(local_path)}"
        blob = bucket.blob(blob_name)
        blob.upload_from_filename(local_path)
        gcs_uri = f"gs://{bucket_name}/{blob_name}"
        print(f"Uploaded to GCS: {gcs_uri}")
        return gcs_uri
    except Exception as e:
        print(f"GCS upload skipped: {e}")
        return ""

# Write credentials to temp file (avoids f-string quoting issues)
_creds_path = os.path.join(DOWNLOAD_DIR, "sizeup_creds.json")
with open(_creds_path, "w") as _f:
    _json.dump({
        "email":    FIRSTDUE_EMAIL,
        "password": FIRSTDUE_PASSWORD,
        "dl_dir":   DOWNLOAD_DIR,
    }, _f)

# ── Scraper script written to disk ──────────────────────────────────────────
_scraper = os.path.join(DOWNLOAD_DIR, "sizeup_scraper.py")
_scraper_src = r"""
import os, sys, json
from playwright.sync_api import sync_playwright, TimeoutError as PWTimeout

with open(sys.argv[1]) as f:
    cfg = json.load(f)

email    = cfg["email"]
password = cfg["password"]
dl_dir   = cfg["dl_dir"]
shot     = os.path.join(dl_dir, "sizeup_debug.png")
DISPATCH = "https://sizeup.firstduesizeup.com/dispatch"
AUTH_URL = "https://sizeup.firstduesizeup.com/auth/signin-v2"

def dump_inputs(page, label):
    els = page.locator("input").all()
    print(f"[{label}] {len(els)} input(s) on page:", flush=True)
    for el in els:
        t  = el.get_attribute("type") or "text"
        n  = el.get_attribute("name") or ""
        i  = el.get_attribute("id")   or ""
        ph = el.get_attribute("placeholder") or ""
        vis = el.is_visible()
        print(f"  type={t!r} name={n!r} id={i!r} ph={ph!r} visible={vis}", flush=True)

def snap(page, shot, label):
    page.screenshot(path=shot)
    print(f"SCREENSHOT:{shot}  [{label}]", flush=True)

with sync_playwright() as p:
    browser = p.chromium.launch(
        headless=True,
        args=["--no-sandbox", "--disable-dev-shm-usage", "--disable-gpu"]
    )
    ctx  = browser.new_context(accept_downloads=True)
    page = ctx.new_page()
    page.set_default_timeout(20000)

    # ── 1. Navigate ──────────────────────────────────────────────────────
    print(f"Navigating to {DISPATCH}", flush=True)
    try:
        page.goto(DISPATCH, wait_until="domcontentloaded", timeout=60000)
    except PWTimeout:
        pass
    page.wait_for_timeout(3000)
    print(f"URL after nav: {page.url}", flush=True)
    snap(page, shot, "after-nav")
    dump_inputs(page, "after-nav")

    # ── 2. Detect login page ─────────────────────────────────────────────
    on_auth = any(x in page.url for x in ("signin", "auth", "login"))
    if not on_auth:
        for pattern in ("**signin**", "**auth**", "**login**"):
            try:
                page.wait_for_url(pattern, timeout=3000)
                on_auth = True
                break
            except PWTimeout:
                pass
    if not on_auth:
        try:
            page.wait_for_selector("input:not([type='hidden'])", timeout=4000)
            on_auth = True
            print("Login form detected via input field (no URL change)", flush=True)
        except PWTimeout:
            pass

    if on_auth:
        print(f"On auth page: {page.url}", flush=True)

        # ── Step 1: fill email ────────────────────────────────────────
        EMAIL_SELS = [
            "input[type='email']",
            "input[name='email']",
            "input[id='email']",
            "input[name='username']",
            "input[autocomplete='email']",
            "input[placeholder*='mail' i]",
            "input[placeholder*='user' i]",
        ]
        email_el = None
        for sel in EMAIL_SELS:
            loc = page.locator(sel)
            if loc.count() > 0 and loc.first.is_visible():
                email_el = loc.first
                print(f"Email field matched selector: {sel}", flush=True)
                break
        if email_el is None:
            for el in page.locator("input").all():
                t = el.get_attribute("type") or "text"
                if t not in ("hidden", "checkbox", "radio", "submit", "button") and el.is_visible():
                    email_el = el
                    print(f"Email field fallback: type={t!r}", flush=True)
                    break
        if email_el is None:
            dump_inputs(page, "no-email-found")
            snap(page, shot, "no-email-found")
            print("ERROR: could not find email field", flush=True)
            sys.exit(1)

        email_el.click()
        email_el.fill(email)
        print(f"Email filled.", flush=True)

        BTN_SELS = [
            "button[type='submit']",
            "button:has-text('Continue')",
            "button:has-text('Next')",
            "button:has-text('Sign in')",
            "button:has-text('Log in')",
            "input[type='submit']",
        ]
        clicked = False
        for sel in BTN_SELS:
            loc = page.locator(sel)
            if loc.count() > 0 and loc.first.is_visible():
                print(f"Clicking continue button: {sel}", flush=True)
                loc.first.click()
                clicked = True
                break
        if not clicked:
            print("WARNING: no Continue button — pressing Enter", flush=True)
            email_el.press("Enter")

        # ── Step 2: wait for password field ──────────────────────────
        print("Waiting for password field...", flush=True)
        try:
            page.wait_for_selector("input[type='password']:visible", timeout=10000)
            print("Password field appeared.", flush=True)
        except PWTimeout:
            snap(page, shot, "no-password-field")
            dump_inputs(page, "no-password-field")
            print("ERROR: password field did not appear", flush=True)
            sys.exit(1)

        page.wait_for_timeout(500)
        snap(page, shot, "password-step")

        pw_el = page.locator("input[type='password']:visible").first
        pw_el.click()
        pw_el.fill(password)
        print("Password filled.", flush=True)

        # ── Submit ────────────────────────────────────────────────────
        # Use the button whose text is specifically 'Sign in' or 'Log in'
        # to avoid accidentally re-clicking 'Continue'
        SUBMIT_SELS = [
            "button:has-text('Sign in')",
            "button:has-text('Log in')",
            "button:has-text('Submit')",
            "button[type='submit']",
            "input[type='submit']",
        ]
        clicked2 = False
        for sel in SUBMIT_SELS:
            loc = page.locator(sel)
            if loc.count() > 0 and loc.first.is_visible():
                print(f"Submit clicking: {sel}", flush=True)
                loc.first.click()
                clicked2 = True
                break
        if not clicked2:
            print("WARNING: no submit button — pressing Enter on password", flush=True)
            pw_el.press("Enter")

        # ── Verify login succeeded (URL must leave auth page) ─────────
        print("Verifying login success (waiting for URL to leave auth page)...", flush=True)
        try:
            page.wait_for_function(
                "() => !window.location.href.includes('signin') && "
                "       !window.location.href.includes('/auth/')",
                timeout=20000
            )
            print(f"Login confirmed! URL: {page.url}", flush=True)
        except PWTimeout:
            snap(page, shot, "login-failed")
            dump_inputs(page, "login-failed")
            print(
                f"WARNING: Login did not complete — URL still {page.url}.\n"
                "This may be caused by bot/datacenter IP detection on Cloud Run.\n"
                "The report will fall back to the most recent CSV in GCS.",
                flush=True
            )
            ctx.close()
            browser.close()
            sys.exit(2)   # exit code 2 = soft fail (fallback to GCS)

        page.wait_for_timeout(3000)
        snap(page, shot, "post-login")

    else:
        print("Already authenticated — no login needed.", flush=True)
        snap(page, shot, "already-authed")

    # ── 3. Download CSV ──────────────────────────────────────────────────
    print("Waiting for download button...", flush=True)
    DL_BTN = "[data-testid='button_download_csv']"
    try:
        page.wait_for_selector(DL_BTN, timeout=30000)
        print("Download button found!", flush=True)
    except PWTimeout as e:
        snap(page, shot, "no-download-btn")
        try:
            testids = page.evaluate(
                "() => [...document.querySelectorAll('[data-testid]')]"
                ".map(el => el.getAttribute('data-testid')).slice(0, 40)"
            )
            print(f"data-testid elements on page: {testids}", flush=True)
        except Exception:
            pass
        print(f"ERROR waiting for download button: {e}", flush=True)
        sys.exit(1)

    snap(page, shot, "ready-to-download")
    with page.expect_download(timeout=60000) as dl_info:
        page.click(DL_BTN)
    download = dl_info.value
    out = os.path.join(dl_dir, download.suggested_filename or "D90.csv")
    download.save_as(out)
    print(f"DOWNLOADED:{out}", flush=True)

    ctx.close()
    browser.close()
"""

with open(_scraper, "w") as _f:
    _f.write(_scraper_src)

# ── Run ─────────────────────────────────────────────────────────────────────
scraped_csv     = None
screenshot_path = None

try:
    res = subprocess.run(
        [sys.executable, _scraper, _creds_path],
        capture_output=True, text=True, timeout=180
    )
    print("=== STDOUT ===")
    print(res.stdout)
    if res.stderr:
        print("=== STDERR (last 2000) ===")
        print(res.stderr[-2000:])

    for line in res.stdout.splitlines():
        if line.startswith("DOWNLOADED:"):
            scraped_csv = line.split("DOWNLOADED:", 1)[1].strip()
        if line.startswith("SCREENSHOT:"):
            screenshot_path = line.split("SCREENSHOT:", 1)[1].split("[")[0].strip()

    if screenshot_path and os.path.exists(screenshot_path):
        from IPython.display import Image, display
        display(Image(filename=screenshot_path))

    if scraped_csv and os.path.exists(scraped_csv):
        print(f"\nCSV ready: {scraped_csv} ({os.path.getsize(scraped_csv):,} bytes)")
        upload_to_gcs(scraped_csv, GCS_BUCKET_NAME, folder="data")
        GCS_CSV_PATH = "data/" + os.path.basename(scraped_csv)
    else:
        exit_code = res.returncode
        if exit_code == 2:
            print("\nScraper soft-failed (likely Cloud Run IP block) — will use GCS fallback.")
        else:
            print(f"\nScraper failed (exit {exit_code}) — will use GCS fallback.")
        scraped_csv = None
finally:
    if os.path.exists(_creds_path):
        os.remove(_creds_path)

## Section 4b: Load from CSV (Active Data Source)

In [None]:
# Default GCS path — overridden by scraper cell if a fresh download succeeded
GCS_CSV_PATH   = os.environ.get("GCS_CSV_PATH", "data/D90.csv")
LOCAL_CSV_PATH = "D90.csv"

def download_csv_from_gcs(bucket_name: str, blob_name: str, local_dest: str) -> str:
    try:
        from google.cloud import storage
        client = storage.Client()
        bucket = client.bucket(bucket_name)
        blob   = bucket.blob(blob_name)
        blob.download_to_filename(local_dest)
        print(f"Downloaded gs://{bucket_name}/{blob_name} -> {local_dest}")
        return local_dest
    except Exception as e:
        print(f"GCS download failed: {e}")
        return None

def load_from_csv(csv_path: str) -> pd.DataFrame:
    df = pd.read_csv(csv_path)
    df.columns = [c.strip() for c in df.columns]
    date_col = next((c for c in df.columns if "DATE" in c.upper() or "TIME" in c.upper()), None)
    if date_col is None:
        raise ValueError(f"No date column found. Columns: {list(df.columns)}")
    df["timestamp"] = pd.to_datetime(df[date_col], errors="coerce")
    df = df.dropna(subset=["timestamp"])
    df["timestamp"] = df["timestamp"].apply(
        lambda x: x.replace(tzinfo=None) if x.tzinfo else x
    )
    type_col = next((c for c in df.columns if "TYPE" in c.upper()), None)
    addr_col = next((c for c in df.columns if "ADDRESS" in c.upper()), None)
    df["incident_type"] = df[type_col].fillna("Unknown") if type_col else "Unknown"
    df["address"]       = df[addr_col].fillna("")        if addr_col else ""
    df["category"]      = df["incident_type"].apply(categorize)
    print(f"Loaded {len(df)} incidents from {csv_path}")
    print(f"Date range: {df['timestamp'].min()} to {df['timestamp'].max()}")
    cat_counts = df["category"].value_counts()
    print("Category breakdown:")
    for cat, cnt in cat_counts.items():
        print(f"  {cat}: {cnt}")
    return df

# ── Pick the best available CSV source (priority order) ──────────────────
# 1. Freshly scraped from SizeUp this run (scraped_csv set by Section 3b)
# 2. Download latest from GCS  (uploaded by previous scraper runs)
# 3. Local fallback D90.csv

import tempfile as _tmpfile, os as _os

csv_to_load = None

# 1. Scraped this run?
_scraped = globals().get("scraped_csv")
if _scraped and _os.path.exists(_scraped):
    csv_to_load = _scraped
    print(f"Using freshly scraped CSV: {csv_to_load}")

# 2. Try GCS
if csv_to_load is None:
    _gcs_dest = _os.path.join(_tmpfile.gettempdir(), "gcs_D90.csv")
    _dl = download_csv_from_gcs(GCS_BUCKET_NAME, GCS_CSV_PATH, _gcs_dest)
    if _dl:
        csv_to_load = _dl
        print(f"Using GCS CSV: {GCS_CSV_PATH}")

# 3. Local fallback
if csv_to_load is None:
    csv_to_load = LOCAL_CSV_PATH
    print(f"Falling back to local: {csv_to_load}")

print(f"Loading from: {csv_to_load}")
df_all = load_from_csv(csv_to_load)

df_daily   = filter_window(df_all, DAILY_START)
df_weekly  = filter_window(df_all, WEEKLY_START)
df_monthly = filter_window(df_all, MONTHLY_START)

table_daily   = build_incident_table(df_daily)
table_weekly  = build_incident_table(df_weekly)
table_monthly = build_incident_table(df_monthly)

stats_daily   = summary_stats(df_daily,   "Last 24 Hours")
stats_weekly  = summary_stats(df_weekly,  "Last 7 Days")
stats_monthly = summary_stats(df_monthly, "Last 30 Days")

print(f"\nWindow counts:")
print(f"  Daily:   {stats_daily['total']} incidents")
print(f"  Weekly:  {stats_weekly['total']} incidents")
print(f"  Monthly: {stats_monthly['total']} incidents")

## Section 5b: Incident Histogram (10-Day Buckets)

In [None]:
import matplotlib
matplotlib.use("Agg")
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np

def plot_incident_histogram(df, title="Station 187 — Incidents by 10-Day Period"):
    if df.empty:
        print("No data to plot.")
        return None

    df = df.copy()
    min_date = df["timestamp"].min().floor("D")
    max_date = df["timestamp"].max().ceil("D")

    # Build 10-day bin edges
    bin_edges = []
    cursor = min_date
    while cursor <= max_date:
        bin_edges.append(cursor)
        cursor += pd.Timedelta(days=10)
    if bin_edges[-1] < max_date:
        bin_edges.append(cursor)

    bin_labels = [f"{b.strftime('%b %d')}" for b in bin_edges[:-1]]
    df["bucket"] = pd.cut(df["timestamp"], bins=bin_edges, labels=bin_labels, right=False)
    df = df.dropna(subset=["bucket"])

    # Top categories by volume
    top_cats = df["category"].value_counts().head(12).index.tolist()
    df["cat_display"] = df["category"].apply(lambda x: x if x in top_cats else "Other / Uncategorized")

    all_cats = top_cats.copy()
    if "Other / Uncategorized" not in all_cats and (df["cat_display"] == "Other / Uncategorized").any():
        all_cats.append("Other / Uncategorized")

    pivot = (
        df.groupby(["bucket", "cat_display"], observed=True)
        .size()
        .unstack(fill_value=0)
        .reindex(columns=all_cats, fill_value=0)
    )

    # Color palette
    palette = [
        "#C0392B", "#2980B9", "#27AE60", "#F39C12", "#8E44AD",
        "#16A085", "#D35400", "#2C3E50", "#1ABC9C", "#E74C3C",
        "#3498DB", "#F1C40F", "#7F8C8D", "#E67E22"
    ]
    colors = palette[:len(pivot.columns)]

    fig, ax = plt.subplots(figsize=(14, 6))
    pivot.plot(
        kind="bar", stacked=True, ax=ax,
        color=colors, width=0.78, edgecolor="white", linewidth=0.4
    )

    ax.set_title(title, fontsize=14, fontweight="bold", pad=14)
    ax.set_xlabel("10-Day Period (Start Date)", fontsize=11)
    ax.set_ylabel("Number of Incidents", fontsize=11)
    ax.set_xticklabels(bin_labels, rotation=40, ha="right", fontsize=9)
    ax.legend(
        title="Incident Category", bbox_to_anchor=(1.01, 1),
        loc="upper left", fontsize=8, title_fontsize=9
    )
    ax.grid(axis="y", alpha=0.25, linestyle="--")
    ax.spines["top"].set_visible(False)
    ax.spines["right"].set_visible(False)
    ax.set_facecolor("#F9F9F9")
    fig.patch.set_facecolor("white")

    plt.tight_layout()

    chart_path = os.path.join(tempfile.gettempdir(), "incident_histogram.png")
    plt.savefig(chart_path, dpi=150, bbox_inches="tight", facecolor="white")
    plt.show()
    print(f"Chart saved: {chart_path}")
    return chart_path

chart_path = plot_incident_histogram(df_all)

## Section 6: Word Document Builder

In [None]:
def set_cell_bg(cell, hex_color: str):
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    shd = OxmlElement("w:shd")
    shd.set(qn("w:val"),   "clear")
    shd.set(qn("w:color"), "auto")
    shd.set(qn("w:fill"),  hex_color)
    tcPr.append(shd)

def set_cell_border(cell, **kwargs):
    tc = cell._tc
    tcPr = tc.get_or_add_tcPr()
    tcBorders = OxmlElement("w:tcBorders")
    for edge in ("top", "left", "bottom", "right", "insideH", "insideV"):
        tag = OxmlElement(f"w:{edge}")
        tag.set(qn("w:val"),   kwargs.get("val",   "single"))
        tag.set(qn("w:sz"),    kwargs.get("sz",    "4"))
        tag.set(qn("w:space"), kwargs.get("space", "0"))
        tag.set(qn("w:color"), kwargs.get("color", "000000"))
        tcBorders.append(tag)
    tcPr.append(tcBorders)

def add_incident_table_to_doc(doc, df_table: pd.DataFrame, title: str):
    doc.add_heading(title, level=2)
    if df_table.empty or len(df_table) == 0:
        doc.add_paragraph("No incidents in this window.")
        return
    cols = ["Category", "Daytime", "Evening/Night", "Total"]
    table = doc.add_table(rows=1, cols=len(cols))
    table.style = "Table Grid"
    hdr_cells = table.rows[0].cells
    for i, col in enumerate(cols):
        hdr_cells[i].text = col
        hdr_cells[i].paragraphs[0].runs[0].bold = True
        hdr_cells[i].paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
        set_cell_bg(hdr_cells[i], "1F3864")
        for run in hdr_cells[i].paragraphs[0].runs:
            run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)
            run.font.size = Pt(9)
        set_cell_border(hdr_cells[i])
    for _, row in df_table.iterrows():
        is_total = str(row["Category"]).upper() == "TOTAL"
        cells = table.add_row().cells
        cells[0].text = str(row["Category"])
        cells[1].text = str(row["Daytime"])
        cells[2].text = str(row["Evening/Night"])
        cells[3].text = str(row["Total"])
        for j, cell in enumerate(cells):
            para = cell.paragraphs[0]
            para.alignment = WD_ALIGN_PARAGRAPH.CENTER if j > 0 else WD_ALIGN_PARAGRAPH.LEFT
            if is_total:
                set_cell_bg(cell, "D9E1F2")
                for run in para.runs:
                    run.bold = True
                    run.font.size = Pt(9)
            else:
                for run in para.runs:
                    run.font.size = Pt(9)
            set_cell_border(cell)

def add_mutual_aid_section(doc, df: pd.DataFrame):
    doc.add_heading("Mutual Aid Summary", level=2)
    ma_given    = df[df["category"] == "Mutual Aid Given"]
    ma_received = df[df["category"] == "Mutual Aid Received"]
    p = doc.add_paragraph()
    p.add_run(f"Mutual Aid Given:    ").bold = True
    p.add_run(str(len(ma_given)))
    p2 = doc.add_paragraph()
    p2.add_run(f"Mutual Aid Received: ").bold = True
    p2.add_run(str(len(ma_received)))

def add_definitions_section(doc):
    doc.add_heading("Definitions", level=2)
    definitions = [
        ("Daytime",       f"{DAYTIME_START_HOUR:02d}:00 - {DAYTIME_END_HOUR:02d}:00"),
        ("Evening/Night", f"{DAYTIME_END_HOUR:02d}:00 - {DAYTIME_START_HOUR:02d}:00"),
        ("AFA",           "Automatic Fire Alarm"),
        ("Commercial AFA","Fire alarm activation in a commercial occupancy"),
        ("Residential AFA","Fire alarm activation in a residential occupancy"),
    ]
    for term, defn in definitions:
        p = doc.add_paragraph(style="List Bullet")
        p.add_run(f"{term}: ").bold = True
        p.add_run(defn)

def build_word_document(
    table_daily, table_weekly, table_monthly,
    df_daily, df_weekly, df_monthly,
    weather_data: dict, traffic_data: dict,
    narrative: str,
    output_path: str,
    chart_path: str = None,
):
    doc = Document()
    style = doc.styles["Normal"]
    style.font.name = "Calibri"
    style.font.size = Pt(11)

    # Title
    title = doc.add_heading("Ingomar Volunteer Fire Company", level=1)
    title.alignment = WD_ALIGN_PARAGRAPH.CENTER
    sub = doc.add_heading("Station 187 - Daily Operations Report", level=2)
    sub.alignment = WD_ALIGN_PARAGRAPH.CENTER
    date_p = doc.add_paragraph(NOW_ET.strftime("%A, %B %d, %Y  |  Generated at %H:%M %Z"))
    date_p.alignment = WD_ALIGN_PARAGRAPH.CENTER
    doc.add_paragraph()

    # Risk scores
    doc.add_heading("Risk Assessment", level=2)
    risk_table = doc.add_table(rows=1, cols=3)
    risk_table.style = "Table Grid"
    hcells = risk_table.rows[0].cells
    for i, label in enumerate(["Metric", "Score (1-10)", "Details"]):
        hcells[i].text = label
        hcells[i].paragraphs[0].runs[0].bold = True
        set_cell_bg(hcells[i], "1F3864")
        for run in hcells[i].paragraphs[0].runs:
            run.font.color.rgb = RGBColor(0xFF, 0xFF, 0xFF)

    for row_data in [
        ("Weather Risk", weather_data.get("risk_score", "N/A"), weather_data.get("summary", "")),
        ("Traffic Risk", traffic_data.get("risk_score", "N/A"), traffic_data.get("summary", "")),
    ]:
        cells = risk_table.add_row().cells
        for j, val in enumerate(row_data):
            cells[j].text = str(val)
            set_cell_border(cells[j])

    doc.add_paragraph()

    # Duty crew narrative
    doc.add_heading("Duty Crew Risk Narrative", level=2)
    doc.add_paragraph(narrative)
    doc.add_paragraph()

    # Incident tables
    add_incident_table_to_doc(doc, table_daily,   "Last 24 Hours - Incident Summary")
    doc.add_paragraph()
    add_incident_table_to_doc(doc, table_weekly,  "Last 7 Days - Incident Summary")
    doc.add_paragraph()
    add_incident_table_to_doc(doc, table_monthly, "Last 30 Days - Incident Summary")
    doc.add_paragraph()

    # Mutual aid
    add_mutual_aid_section(doc, df_monthly)
    doc.add_paragraph()

    # Definitions
    add_definitions_section(doc)


    # Incident histogram chart
    if chart_path and os.path.exists(chart_path):
        doc.add_heading("Incident Distribution (10-Day Buckets)", level=2)
        doc.add_picture(chart_path, width=Inches(6.5))
        doc.paragraphs[-1].alignment = WD_ALIGN_PARAGRAPH.CENTER
        doc.add_paragraph()

    doc.save(output_path)
    print(f"Word document saved: {output_path}")
    return output_path

print("Word document builder functions defined.")


## Section 7: Weather Risk Score (NWS)

In [None]:
NWS_HEADERS = {"User-Agent": "IVFC-DailyReport/1.0 (contact@ivfc187.org)"}

def get_pittsburgh_weather() -> dict:
    try:
        obs_url = "https://api.weather.gov/stations/KPIT/observations/latest"
        obs = requests.get(obs_url, headers=NWS_HEADERS, timeout=15).json()
        props = obs.get("properties", {})
        temp_c    = props.get("temperature", {}).get("value")
        wind_ms   = props.get("windSpeed",   {}).get("value")
        precip    = props.get("precipitationLastHour", {}).get("value") or 0
        vis_m     = props.get("visibility",  {}).get("value")
        conditions = str(props.get("textDescription", "")).lower()

        temp_f  = (temp_c * 9/5 + 32) if temp_c is not None else None
        wind_mph = (wind_ms * 2.237)   if wind_ms is not None else 0
        vis_mi  = (vis_m / 1609.34)    if vis_m is not None else 10

        score = 1
        if temp_f is not None:
            if temp_f <= 20 or temp_f >= 100:
                score += 3
            elif temp_f <= 32 or temp_f >= 90:
                score += 2
            elif temp_f <= 40 or temp_f >= 85:
                score += 1
        if wind_mph >= 40:
            score += 3
        elif wind_mph >= 25:
            score += 2
        elif wind_mph >= 15:
            score += 1
        if precip and precip > 0:
            score += 2
        elif any(w in conditions for w in ["rain", "snow", "storm", "thunder", "sleet", "ice"]):
            score += 2
        if vis_mi < 0.25:
            score += 2
        elif vis_mi < 1:
            score += 1

        score = min(score, 10)
        summary = (
            f"Temp: {temp_f:.0f}F, Wind: {wind_mph:.0f} mph, "
            f"Precip: {precip:.2f} in, Vis: {vis_mi:.1f} mi"
            if temp_f is not None else conditions
        )
        return {"risk_score": score, "summary": summary, "raw": props}
    except Exception as e:
        return {"risk_score": 5, "summary": f"Weather data unavailable: {e}", "raw": {}}

weather_data = get_pittsburgh_weather()
print(f"Weather risk score: {weather_data['risk_score']}/10")
print(f"Summary: {weather_data['summary']}")


## Section 8: Traffic Risk Score (TomTom)

In [None]:
PGH_BBOX = {
    "min_lat": 40.30, "max_lat": 40.65,
    "min_lon": -80.20, "max_lon": -79.75,
}

def get_pittsburgh_traffic() -> dict:
    if not TOMTOM_API_KEY:
        return {"risk_score": 5, "summary": "TomTom API key not set", "incidents": []}
    try:
        bbox_str = (
            f"{PGH_BBOX['min_lat']},{PGH_BBOX['min_lon']},"
            f"{PGH_BBOX['max_lat']},{PGH_BBOX['max_lon']}"
        )
        url = (
            f"https://api.tomtom.com/traffic/services/5/incidentDetails"
            f"?key={TOMTOM_API_KEY}&bbox={bbox_str}&fields={{incidents{{type,geometry{{type}},"
            f"properties{{iconCategory,magnitudeOfDelay,startTime,endTime,length,delay,"
            f"roadNumbers,timeValidity}}}}}}&language=en-GB&t=1111&categoryFilter=0,1,2,3,4,5,6,7,8,9,10,11,14"
        )
        resp = requests.get(url, timeout=15)
        if resp.status_code == 200:
            data = resp.json()
            incidents = data.get("incidents", [])
            total   = len(incidents)
            serious = sum(
                1 for i in incidents
                if i.get("properties", {}).get("magnitudeOfDelay", 0) >= 3
            )
            score = 1
            if total >= 20:
                score += 3
            elif total >= 10:
                score += 2
            elif total >= 5:
                score += 1
            if serious >= 5:
                score += 3
            elif serious >= 2:
                score += 2
            elif serious >= 1:
                score += 1
            hour = NOW_ET.hour
            if 7 <= hour <= 9 or 16 <= hour <= 18:
                score += 2
            elif 6 <= hour <= 10 or 15 <= hour <= 19:
                score += 1
            score = min(score, 10)
            summary = f"{total} incidents ({serious} serious) in Pittsburgh metro"
            return {"risk_score": score, "summary": summary, "incidents": incidents}
        else:
            hour = NOW_ET.hour
            if 7 <= hour <= 9 or 16 <= hour <= 18:
                score, summary = 7, "Peak rush hour - elevated traffic risk (API unavailable)"
            elif 6 <= hour <= 10 or 15 <= hour <= 19:
                score, summary = 5, "Shoulder rush hour - moderate traffic risk (API unavailable)"
            else:
                score, summary = 2, "Off-peak hours - low traffic risk (API unavailable)"
            return {"risk_score": score, "summary": summary, "incidents": []}
    except Exception as e:
        return {"risk_score": 4, "summary": f"Traffic data error: {e}", "incidents": []}

traffic_data = get_pittsburgh_traffic()
print(f"Traffic risk score: {traffic_data['risk_score']}/10")
print(f"Summary: {traffic_data['summary']}")


## Section 9: Claude AI Risk Narrative

In [None]:
def generate_risk_narrative(weather: dict, traffic: dict, df_recent: pd.DataFrame) -> str:
    client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)

    recent_types = ""
    if not df_recent.empty:
        top = df_recent["category"].value_counts().head(5)
        recent_types = ", ".join([f"{cat} ({cnt})" for cat, cnt in top.items()])
    else:
        recent_types = "No incidents in the last 24 hours"

    prompt = f"""You are a fire department operations analyst for Ingomar Volunteer Fire Company, Station 187,
serving the McCandless area of Allegheny County, Pennsylvania.

Current conditions:
- Weather risk score: {weather.get('risk_score', 'N/A')}/10
- Weather summary: {weather.get('summary', 'N/A')}
- Traffic risk score: {traffic.get('risk_score', 'N/A')}/10
- Traffic summary: {traffic.get('summary', 'N/A')}
- Recent 24-hour incident types: {recent_types}

Write a concise duty-crew risk narrative (3-4 sentences) for today operations briefing.
Include: the highest current risk factor, what incident types the crew should be prepared for,
and one specific mitigation recommendation. Use professional fire service language."""

    try:
        message = client.messages.create(
            model="claude-haiku-4-5-20251001",
            max_tokens=300,
            messages=[{"role": "user", "content": prompt}]
        )
        return message.content[0].text
    except Exception as e:
        return (
            f"Risk narrative unavailable ({e}). "
            f"Weather risk: {weather.get('risk_score')}/10. "
            f"Traffic risk: {traffic.get('risk_score')}/10. "
            f"Primary incident type: {df_recent['category'].value_counts().index[0] if not df_recent.empty else 'N/A'}."
        )

narrative = generate_risk_narrative(weather_data, traffic_data, df_daily)
print("Risk narrative generated.")
print()
print(narrative)


## Section 10: Assemble and Save Report

In [None]:
import os, tempfile

report_filename = f"IVFC_DailyReport_{NOW_ET.strftime('%Y%m%d_%H%M')}.docx"
report_path = os.path.join(tempfile.gettempdir(), report_filename)

build_word_document(
    table_daily   = table_daily,
    table_weekly  = table_weekly,
    table_monthly = table_monthly,
    df_daily      = df_daily,
    df_weekly     = df_weekly,
    df_monthly    = df_monthly,
    weather_data  = weather_data,
    traffic_data  = traffic_data,
    narrative     = narrative,
    output_path   = report_path,
    chart_path    = chart_path if "chart_path" in dir() else None,
)

print(f"Report assembled: {report_filename}")


## Section 11: Upload to Google Cloud Storage

In [None]:
# upload_to_gcs is defined in Section 3b (before the scraper) so it's available early.
# Calling it here to upload the final Word report.
gcs_uri = upload_to_gcs(report_path, GCS_BUCKET_NAME)

## Section 12: Email Report via SendGrid

In [None]:
def send_report_email(
        report_path: str,
        report_filename: str,
        gcs_uri: str,
        weather: dict,
        traffic: dict,
) -> bool:
    try:
        import sendgrid
        from sendgrid.helpers.mail import (
            Mail, Attachment, FileContent, FileName,
            FileType, Disposition, To,
        )
        to_list = [addr.strip() for addr in EMAIL_TO.split(",") if addr.strip()]
        with open(report_path, "rb") as f:
            file_data = base64.b64encode(f.read()).decode()
        body = (
            f"IVFC Station 187 - Daily Operations Report\n\n"
            f"Report Date: {NOW_ET.strftime('%A, %B %d, %Y')}\n"
            f"Generated:   {NOW_ET.strftime('%H:%M %Z')}\n\n"
            f"Weather Risk Score: {weather.get('risk_score', 'N/A')}/10 - {weather.get('summary', '')}\n"
            f"Traffic Risk Score: {traffic.get('risk_score', 'N/A')}/10 - {traffic.get('summary', '')}\n"
        )
        if gcs_uri:
            body += f"\nGCS Location: {gcs_uri}\n"
        message = Mail(
            from_email = EMAIL_FROM,
            to_emails  = [To(addr) for addr in to_list],
            subject    = f"IVFC Station 187 Daily Report - {NOW_ET.strftime('%B %d, %Y')}",
            plain_text_content = body,
        )
        attachment = Attachment(
            FileContent(file_data),
            FileName(report_filename),
            FileType("application/vnd.openxmlformats-officedocument.wordprocessingml.document"),
            Disposition("attachment"),
        )
        message.attachment = attachment
        sg = sendgrid.SendGridAPIClient(api_key=SENDGRID_API_KEY)
        response = sg.send(message)
        print(f"Email sent. Status: {response.status_code}")
        return True
    except Exception as e:
        print(f"SendGrid failed ({e}), trying SMTP fallback...")
        try:
            import smtplib
            from email.mime.multipart import MIMEMultipart
            from email.mime.base import MIMEBase
            from email.mime.text import MIMEText
            from email import encoders
            msg = MIMEMultipart()
            msg["From"]    = EMAIL_FROM
            msg["To"]      = EMAIL_TO
            msg["Subject"] = f"IVFC Station 187 Daily Report - {NOW_ET.strftime('%B %d, %Y')}"
            msg.attach(MIMEText(body, "plain"))
            with open(report_path, "rb") as f:
                part = MIMEBase("application", "octet-stream")
                part.set_payload(f.read())
                encoders.encode_base64(part)
                part.add_header("Content-Disposition", f'attachment; filename="{report_filename}"')
                msg.attach(part)
            with smtplib.SMTP("smtp.sendgrid.net", 587) as server:
                server.starttls()
                server.login("apikey", SENDGRID_API_KEY)
                server.sendmail(EMAIL_FROM, EMAIL_TO.split(","), msg.as_string())
            print("Email sent via SMTP fallback.")
            return True
        except Exception as e2:
            print(f"Email failed entirely: {e2}")
            return False

email_sent = send_report_email(
    report_path      = report_path,
    report_filename  = report_filename,
    gcs_uri          = gcs_uri,
    weather          = weather_data,
    traffic          = traffic_data,
)


## Section 13: Execution Summary

In [None]:
print("=" * 60)
print("  IVFC DAILY OPERATIONS REPORT - EXECUTION SUMMARY")
print("=" * 60)
print(f"  Report date:     {NOW_ET.strftime('%A, %B %d, %Y')}")
print(f"  Generated at:    {NOW_ET.strftime('%H:%M %Z')}")
print()
print(f"  Incidents (24h): {stats_daily['total']}")
print(f"  Incidents (7d):  {stats_weekly['total']}")
print(f"  Incidents (30d): {stats_monthly['total']}")
print()
print(f"  Weather risk:    {weather_data.get('risk_score', 'N/A')}/10")
print(f"  Traffic risk:    {traffic_data.get('risk_score', 'N/A')}/10")
print()
print(f"  Report file:     {report_filename}")
print(f"  GCS location:    {gcs_uri if gcs_uri else 'Not uploaded'}")
print(f"  Email sent:      {'Yes' if email_sent else 'No'}")
print("=" * 60)
