In [None]:
#!/usr/bin/env python3
# Create continuity outputs (2014-2023): regression slopes over time + region-share trends.
#
# Input:
#   tiles_panel_all_countries_<start>-<end>.csv (from build_tiles_all_years.py)
#
# Outputs:
#   coef_trends.csv
#   figure_beta_trends.png
#   region_share_trends.csv
#   figure_urban_share_trends.png
#
# Models per country-year:
#   (1) baseline: log_light ~ log_pop
#   (2) +regime:  log_light ~ log_pop + C(region_type)

import os, argparse
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from statsmodels.formula.api import ols

REGION_TYPES = ["urban_core", "dense_dim", "bright_sparse", "mixed", "empty_or_rural"]

def main():
    ap = argparse.ArgumentParser()
    ap.add_argument("--panel_csv", required=True, help="tiles_panel_all_countries_2014-2023.csv")
    ap.add_argument("--out_dir", default=None, help="Output folder (default: same folder as panel_csv)")
    args = ap.parse_args()

    panel = pd.read_csv(args.panel_csv)
    out_dir = args.out_dir or os.path.dirname(args.panel_csv)
    os.makedirs(out_dir, exist_ok=True)

    rows = []
    for (country, year), sub in panel.groupby(["country", "year"]):
        if len(sub) < 8:
            continue
        m1 = ols("log_light ~ log_pop", data=sub).fit()
        m2 = ols("log_light ~ log_pop + C(region_type)", data=sub).fit()
        rows.append({
            "country": country,
            "year": int(year),
            "n_tiles": int(len(sub)),
            "beta_log_pop": float(m1.params.get("log_pop", np.nan)),
            "r2_baseline": float(m1.rsquared),
            "r2_plus_regime": float(m2.rsquared),
        })

    coef = pd.DataFrame(rows).sort_values(["country", "year"])
    coef_path = os.path.join(out_dir, "coef_trends.csv")
    coef.to_csv(coef_path, index=False)

    plt.figure(figsize=(8, 5))
    for country, sub in coef.groupby("country"):
        plt.plot(sub["year"], sub["beta_log_pop"], marker="o", linewidth=2, label=country)
    plt.xlabel("Year")
    plt.ylabel("Slope: beta on log(1 + population)")
    plt.title("Continuity: populationâ€“nightlights slope over time (tile-level OLS)")
    plt.legend(frameon=True)
    plt.tight_layout()
    beta_fig = os.path.join(out_dir, "figure_beta_trends.png")
    plt.savefig(beta_fig, dpi=220, bbox_inches="tight")
    plt.close()

    shares = []
    for (country, year), sub in panel.groupby(["country", "year"]):
        vc = sub["region_type"].value_counts(normalize=True)
        row = {"country": country, "year": int(year)}
        for rt in REGION_TYPES:
            row[f"share_{rt}"] = float(vc.get(rt, 0.0))
        shares.append(row)

    shares_df = pd.DataFrame(shares).sort_values(["country", "year"])
    share_path = os.path.join(out_dir, "region_share_trends.csv")
    shares_df.to_csv(share_path, index=False)

    plt.figure(figsize=(8, 5))
    for country, sub in shares_df.groupby("country"):
        plt.plot(sub["year"], sub["share_urban_core"], marker="o", linewidth=2, label=country)
    plt.xlabel("Year")
    plt.ylabel("Tile share classified as urban_core")
    plt.title("Continuity: urban_core share over time")
    plt.legend(frameon=True)
    plt.tight_layout()
    share_fig = os.path.join(out_dir, "figure_urban_share_trends.png")
    plt.savefig(share_fig, dpi=220, bbox_inches="tight")
    plt.close()

    print("Saved:")
    print(" -", coef_path)
    print(" -", beta_fig)
    print(" -", share_path)
    print(" -", share_fig)

if __name__ == "__main__":
    main()
