# 03 - Service Effectiveness

Measure how effectively states convert spending into outcomes.

**Metrics:**
- Education: NAEP test scores, graduation rates, college readiness
- Infrastructure: ASCE grades, road condition ratings
- Public safety: crime rates (FBI UCR)
- Health: child health outcomes, infant mortality

**Output:** `service_effectiveness.json`, `service_effectiveness.csv`

In [None]:
import sys, os, json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
from dotenv import load_dotenv

sys.path.insert(0, str(Path("..").resolve()))
from src.data_utils import (
    fetch_naep_scores, fetch_fbi_crime_data, load_cdc_infant_mortality,
)

load_dotenv(Path("../.env"))

RAW_DIR = Path("../data/raw")
PROCESSED_DIR = Path("../data/processed")
CHARTS_DIR = Path("../docs/charts/data")
RAW_DIR.mkdir(parents=True, exist_ok=True)
PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
CHARTS_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
def normalize_to_100(series, invert=False):
    """Min-max normalize a series to 0-100.
    
    Args:
        series: pandas Series of raw values
        invert: if True, higher raw values produce lower scores
                (use for metrics where lower is better, like crime rates)
    """
    min_val = series.min()
    max_val = series.max()
    if max_val == min_val:
        return pd.Series(50.0, index=series.index)
    normalized = (series - min_val) / (max_val - min_val) * 100
    if invert:
        normalized = 100 - normalized
    return normalized

# Fetch NAEP math + reading scores (grade 8)
math_scores = fetch_naep_scores(subject="mathematics", grade=8)
reading_scores = fetch_naep_scores(subject="reading", grade=8)

# Average math and reading scores
education = math_scores[["state", "state_name"]].copy()
education = education.merge(
    math_scores[["state", "score"]].rename(columns={"score": "math_score"}),
    on="state",
)
education = education.merge(
    reading_scores[["state", "score"]].rename(columns={"score": "reading_score"}),
    on="state",
)
education["avg_score"] = (education["math_score"] + education["reading_score"]) / 2

print(f"Education data: {len(education)} states")
print(f"Average score range: {education['avg_score'].min():.0f} – {education['avg_score'].max():.0f}")
education.sort_values("avg_score", ascending=False).head(10)

In [None]:
# Fetch FBI violent crime rates
FBI_API_KEY = os.environ.get("FBI_API_KEY", "")
crime = fetch_fbi_crime_data(FBI_API_KEY)
print(f"Crime data: {len(crime)} states")
print(f"Violent crime rate range: {crime['violent_crime'].min():.0f} – "
      f"{crime['violent_crime'].max():.0f} per 100k")
crime.sort_values("violent_crime", ascending=False).head(10)

In [None]:
# Load CDC infant mortality data
mortality = load_cdc_infant_mortality()
print(f"Mortality data: {len(mortality)} states")
print(f"Infant mortality rate range: {mortality['infant_mort_rate'].min():.1f} – "
      f"{mortality['infant_mort_rate'].max():.1f} per 1,000 live births")
mortality.sort_values("infant_mort_rate", ascending=False).head(10)

In [None]:
# Merge all metrics and normalize to 0-100 composite score
effectiveness = education[["state", "state_name", "avg_score"]].merge(
    crime[["state", "violent_crime"]], on="state", how="inner"
).merge(
    mortality[["state", "infant_mort_rate"]], on="state", how="inner"
)

# Normalize: education higher=better, crime lower=better (invert), mortality lower=better (invert)
effectiveness["education_score"] = normalize_to_100(effectiveness["avg_score"])
effectiveness["safety_score"] = normalize_to_100(effectiveness["violent_crime"], invert=True)
effectiveness["health_score"] = normalize_to_100(effectiveness["infant_mort_rate"], invert=True)

# Composite = simple average of three normalized scores
effectiveness["composite_score"] = (
    effectiveness["education_score"]
    + effectiveness["safety_score"]
    + effectiveness["health_score"]
) / 3

print(f"States with all 3 metrics: {len(effectiveness)}")
print(f"Composite score range: {effectiveness['composite_score'].min():.1f} – "
      f"{effectiveness['composite_score'].max():.1f}")
print(f"Mean composite: {effectiveness['composite_score'].mean():.1f}")
effectiveness.sort_values("composite_score", ascending=False).head(10)

In [None]:
# Export to CSV and JSON
export_cols = [
    "state", "state_name", "education_score", "safety_score",
    "health_score", "composite_score",
]
out = effectiveness[export_cols].sort_values("state").reset_index(drop=True)

csv_path = PROCESSED_DIR / "service_effectiveness.csv"
out.to_csv(csv_path, index=False)
print(f"Wrote {csv_path}")

json_path = CHARTS_DIR / "service_effectiveness.json"
out.to_json(json_path, orient="records", indent=2)
print(f"Wrote {json_path}")

In [None]:
# Visualization
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# Bar chart — composite scores
ax1 = axes[0]
plot_data = out.sort_values("composite_score", ascending=True)
colors = plt.cm.RdYlGn(plot_data["composite_score"] / 100)
ax1.barh(plot_data["state"], plot_data["composite_score"], color=colors)
ax1.set_xlabel("Composite Effectiveness Score (0–100)")
ax1.set_title("Service Effectiveness by State")
ax1.axvline(x=50, color="gray", linestyle="--", alpha=0.5)

# Scatter — education vs safety
ax2 = axes[1]
ax2.scatter(out["education_score"], out["safety_score"],
            c=out["health_score"], cmap="RdYlGn", s=40, alpha=0.8, edgecolors="gray")
for _, row in out.iterrows():
    ax2.annotate(row["state"], (row["education_score"], row["safety_score"]),
                 fontsize=6, alpha=0.7)
ax2.set_xlabel("Education Score")
ax2.set_ylabel("Safety Score")
ax2.set_title("Education vs Safety (color = Health)")
cbar = plt.colorbar(ax2.collections[0], ax=ax2, label="Health Score")

plt.tight_layout()
plt.show()

In [None]:
import plotly.express as px

fig = px.choropleth(
    out,
    locations="state",
    locationmode="USA-states",
    color="composite_score",
    color_continuous_scale="RdYlGn",
    range_color=[0, 100],
    scope="usa",
    hover_name="state_name",
    hover_data={
        "state": False,
        "composite_score": ":.1f",
        "education_score": ":.1f",
        "safety_score": ":.1f",
        "health_score": ":.1f",
    },
    labels={"composite_score": "Composite Score"},
    title="Service Effectiveness by State",
)
fig.update_layout(
    coloraxis_colorbar=dict(title="Score"),
    geo=dict(lakecolor="rgb(255,255,255)"),
)
fig.show()