# annual analytics 2025-2026

this notebook reads csv packs generated by analytics-range and report-range.

expected folders:
- data/reports/analytics_2025-01-01_2026-01-20/
- data/reports/range_2025-01-01_2026-01-20/
- data/figures/2025-01-01_2026-01-20/ (optional)


In [None]:
from pathlib import Path
import pandas as pd

start = "2025-01-01"
end = "2026-01-20"

reports_dir = Path("data/reports")
analytics_dir = reports_dir / f"analytics_{start}_{end}"
range_dir = reports_dir / f"range_{start}_{end}"

analytics_dir, range_dir


In [None]:
quality = pd.read_csv(analytics_dir / "quality_daily.csv")
seasonality = pd.read_csv(analytics_dir / "seasonality_uf.csv")
hotspots = pd.read_csv(analytics_dir / "hotspots_mun_period.csv")
shifts = pd.read_csv(analytics_dir / "top_shifts_uf.csv")

br_daily = pd.read_csv(range_dir / "br_daily.csv")
uf_top = pd.read_csv(range_dir / "uf_top.csv")
mun_top = pd.read_csv(range_dir / "mun_top.csv")

quality.head()


In [None]:
uf_top.head(10)

mun_top.head(10)


In [None]:
import matplotlib.pyplot as plt

quality["day"] = pd.to_datetime(quality["day"])
quality["pct_missing"] = 100 - quality["pct_com_mun"]

fig, ax_pct = plt.subplots(figsize=(12, 4))
ax_miss = ax_pct.twinx()

ax_pct.plot(quality["day"], quality["pct_missing"], label="pct_missing")
ax_miss.bar(quality["day"], quality["missing_mun"], alpha=0.35, label="missing_mun")

miss_mask = quality["missing_mun"] > 0
ax_miss.scatter(quality.loc[miss_mask, "day"], quality.loc[miss_mask, "missing_mun"], s=18)

ax_pct.set_ylabel("pct_missing")
ax_miss.set_ylabel("missing_mun")
ax_pct.set_xlabel("day")
plt.tight_layout()


In [None]:
br_daily["day"] = pd.to_datetime(br_daily["day"])
br_daily = br_daily.sort_values("day")

br_daily["total_ma7"] = br_daily["n_focos_total"].rolling(7, min_periods=1).mean()
br_daily["com_mun_ma7"] = br_daily["n_focos_com_mun"].rolling(7, min_periods=1).mean()

fig, ax = plt.subplots(figsize=(12, 4))
ax.plot(br_daily["day"], br_daily["n_focos_total"], alpha=0.3, label="total_daily")
ax.plot(br_daily["day"], br_daily["n_focos_com_mun"], alpha=0.3, label="com_mun_daily")
ax.plot(br_daily["day"], br_daily["total_ma7"], linewidth=1.8, label="total_7d")
ax.plot(br_daily["day"], br_daily["com_mun_ma7"], linewidth=1.8, label="com_mun_7d")
ax.set_ylabel("n_focos")
ax.legend(ncol=2, fontsize=8)
plt.tight_layout()


In [None]:
seasonality["month"] = pd.to_datetime(seasonality["month"])
top_ufs = (
    seasonality.groupby("uf")["n_focos"]
    .sum()
    .sort_values(ascending=False)
    .head(10)
    .index
    .tolist()
)

fig, ax = plt.subplots(figsize=(14, 6))
for uf in top_ufs:
    data = seasonality[seasonality["uf"] == uf].sort_values("month")
    ax.plot(data["month"], data["n_focos"], label=uf)

ax.legend(bbox_to_anchor=(1.02, 1), loc="upper left", fontsize=8, frameon=False)
ax.set_ylabel("n_focos")
plt.tight_layout()


In [None]:
hotspots.head(20)


In [None]:
shifts


In [None]:
from IPython.display import Image, display

fig_dir = Path("data/figures") / f"{start}_{end}"
figs = [
    "quality_pct_missing.png",
    "total_vs_com_mun.png",
    "seasonality_uf_top10.png",
    "hotspots_top_count.png",
    "hotspots_top_density.png",
    "shifts_topn.png",
]

for name in figs:
    path = fig_dir / name
    if path.exists():
        display(Image(filename=str(path)))
    else:
        print(f"missing: {path}")
