# 04 - Public Good Score

Combine all criteria into a final Public Good Index score per state.

## Methodology (inspired by CPI)

1. **Normalize** each sub-metric using z-scores:
   - `z = (value - mean) / std_dev`
   - Rescale to 0–100 (mean ~45, std_dev ~20, clamped to [0, 100])

2. **Sub-metrics** (all oriented so higher = better public good):
   - Tax efficiency: inverse of tax burden (lower taxes = higher score)
   - Investment ratio: % of spending classified as investment
   - Service effectiveness: composite of outcome metrics

3. **Aggregate**: simple average of normalized sub-scores

Score of 100 = low taxes, high investment ratio, great outcomes
Score of 0 = high taxes, mostly cost spending, poor outcomes

**Output:** `public_good_scores.json` for D3 charts

In [None]:
import pandas as pd
import numpy as np
import json
import matplotlib.pyplot as plt
from pathlib import Path

PROCESSED_DIR = Path("../data/processed")
CHARTS_DIR = Path("../docs/charts/data")
CHARTS_DIR.mkdir(parents=True, exist_ok=True)

In [None]:
def normalize_zscore(series, invert=False):
    """Normalize a series to 0-100 using z-score method (CPI-style).
    
    Args:
        series: pandas Series of raw values
        invert: if True, higher raw values produce lower scores
                (use for metrics where lower is better, like tax burden)
    """
    if invert:
        series = -series
    z = (series - series.mean()) / series.std()
    # Rescale: mean=45, std=20 (matching CPI parameters)
    scaled = z * 20 + 45
    return scaled.clip(0, 100)

In [None]:
# Load outputs from notebooks 01-03
tax_burden = pd.read_csv(PROCESSED_DIR / "tax_burden.csv")
spending = pd.read_csv(PROCESSED_DIR / "spending_breakdown.csv")
effectiveness = pd.read_csv(PROCESSED_DIR / "service_effectiveness.csv")

print(f"Tax burden:    {len(tax_burden)} states")
print(f"Spending:      {len(spending)} states")
print(f"Effectiveness: {len(effectiveness)} states")

In [None]:
# Merge all sub-metrics on state
combined = tax_burden[["state", "state_name", "total_burden"]].merge(
    spending[["state", "investment_ratio"]], on="state", how="inner"
).merge(
    effectiveness[["state", "composite_score"]], on="state", how="inner"
)

# Normalize each sub-metric using z-scores (CPI-style, 0-100)
# Tax burden: invert (lower taxes = higher score)
combined["tax_score"] = normalize_zscore(combined["total_burden"], invert=True)
# Investment ratio: higher = better
combined["investment_score"] = normalize_zscore(combined["investment_ratio"])
# Effectiveness: already 0-100 but re-normalize for consistency
combined["effectiveness_score"] = normalize_zscore(combined["composite_score"])

# Calculate Public Good Index = simple average of three scores
combined["public_good_index"] = (
    combined["tax_score"]
    + combined["investment_score"]
    + combined["effectiveness_score"]
) / 3

# Rank states (1 = best)
combined["rank"] = combined["public_good_index"].rank(ascending=False).astype(int)

print(f"States: {len(combined)}")
print(f"Public Good Index range: {combined['public_good_index'].min():.1f} – "
      f"{combined['public_good_index'].max():.1f}")
print(f"Mean: {combined['public_good_index'].mean():.1f}, "
      f"Median: {combined['public_good_index'].median():.1f}")
combined.sort_values("rank").head(10)

In [None]:
# Export results
# Summary scores
score_cols = ["state", "state_name", "rank", "tax_score", "investment_score",
              "effectiveness_score", "public_good_index"]
scores = combined[score_cols].sort_values("rank").reset_index(drop=True)

csv_path = PROCESSED_DIR / "public_good_scores.csv"
scores.to_csv(csv_path, index=False)
print(f"Wrote {csv_path}")

json_path = CHARTS_DIR / "public_good_scores.json"
scores.to_json(json_path, orient="records", indent=2)
print(f"Wrote {json_path}")

# Detailed output with raw values
detail_cols = ["state", "state_name", "rank", "total_burden", "investment_ratio",
               "composite_score", "tax_score", "investment_score",
               "effectiveness_score", "public_good_index"]
detailed = combined[detail_cols].sort_values("rank").reset_index(drop=True)
detail_path = PROCESSED_DIR / "public_good_detailed.csv"
detailed.to_csv(detail_path, index=False)
print(f"Wrote {detail_path}")

In [None]:
# Dashboard visualization
fig, axes = plt.subplots(2, 2, figsize=(18, 14))

# 1. Ranking bar chart
ax1 = axes[0, 0]
plot_data = scores.sort_values("public_good_index", ascending=True)
colors = plt.cm.RdYlGn(plot_data["public_good_index"] / 100)
ax1.barh(plot_data["state"], plot_data["public_good_index"], color=colors)
ax1.set_xlabel("Public Good Index")
ax1.set_title("Public Good Index Ranking")
ax1.axvline(x=45, color="gray", linestyle="--", alpha=0.5, label="Expected Mean")
ax1.legend()

# 2. Component comparison (stacked bar for top/bottom 10)
ax2 = axes[0, 1]
top_bottom = pd.concat([scores.head(10), scores.tail(10)])
x = range(len(top_bottom))
w = 0.25
ax2.barh([i - w for i in x], top_bottom["tax_score"], height=w, label="Tax", color="#3498db")
ax2.barh(x, top_bottom["investment_score"], height=w, label="Investment", color="#27ae60")
ax2.barh([i + w for i in x], top_bottom["effectiveness_score"], height=w, label="Effectiveness", color="#e74c3c")
ax2.set_yticks(x)
ax2.set_yticklabels(top_bottom["state"], fontsize=7)
ax2.set_xlabel("Score (0–100)")
ax2.set_title("Component Scores: Top & Bottom 10")
ax2.legend(fontsize=8)

# 3. Correlation matrix
ax3 = axes[1, 0]
corr_cols = ["tax_score", "investment_score", "effectiveness_score", "public_good_index"]
corr = combined[corr_cols].corr()
im = ax3.imshow(corr, cmap="RdBu_r", vmin=-1, vmax=1)
ax3.set_xticks(range(len(corr_cols)))
ax3.set_yticks(range(len(corr_cols)))
labels = ["Tax", "Investment", "Effectiveness", "PGI"]
ax3.set_xticklabels(labels, fontsize=9)
ax3.set_yticklabels(labels, fontsize=9)
for i in range(len(corr_cols)):
    for j in range(len(corr_cols)):
        ax3.text(j, i, f"{corr.iloc[i, j]:.2f}", ha="center", va="center", fontsize=9)
plt.colorbar(im, ax=ax3, label="Correlation")
ax3.set_title("Component Correlation Matrix")

# 4. Histogram of PGI scores
ax4 = axes[1, 1]
ax4.hist(scores["public_good_index"], bins=15, color="#9b59b6", edgecolor="white", alpha=0.8)
ax4.axvline(x=scores["public_good_index"].mean(), color="red", linestyle="--", label="Mean")
ax4.axvline(x=scores["public_good_index"].median(), color="blue", linestyle="--", label="Median")
ax4.set_xlabel("Public Good Index")
ax4.set_ylabel("Number of States")
ax4.set_title("Distribution of Public Good Index Scores")
ax4.legend()

plt.suptitle("Public Good Index Dashboard", fontsize=16, fontweight="bold", y=1.01)
plt.tight_layout()
plt.show()

In [None]:
import plotly.express as px

fig = px.choropleth(
    scores,
    locations="state",
    locationmode="USA-states",
    color="public_good_index",
    color_continuous_scale="RdYlGn",
    range_color=[0, 100],
    scope="usa",
    hover_name="state_name",
    hover_data={
        "state": False,
        "rank": True,
        "public_good_index": ":.1f",
        "tax_score": ":.1f",
        "investment_score": ":.1f",
        "effectiveness_score": ":.1f",
    },
    labels={"public_good_index": "Public Good Index"},
    title="Public Good Index by State",
)
fig.update_layout(
    coloraxis_colorbar=dict(title="PGI"),
    geo=dict(lakecolor="rgb(255,255,255)"),
)
fig.show()