In [None]:
# Task 4 – Forecasting Access and Usage 2025–2027

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm

# ── 1. Load enriched data ────────────────────────────────────────
df = pd.read_csv("../data/processed/enriched_ethiopia_fi_unified_data.csv")

# ── 2. Prepare Access series ─────────────────────────────────────
access = df[
    (df['record_type'] == 'observation') &
    (df['indicator_code'] == 'ACC_OWNERSHIP')
].copy()

access['date'] = pd.to_datetime(access['observation_date'])
access['year'] = access['date'].dt.year
access = access[['year', 'value_numeric']].dropna()
access = access.sort_values('year')

# ── 3. Linear regression ─────────────────────────────────────────
X = sm.add_constant(access['year'])
y = access['value_numeric']

model = sm.OLS(y, X).fit()
print(model.summary())

# ── 4. Forecast 2025–2027 ────────────────────────────────────────
future_years = pd.DataFrame({'year': [2025, 2026, 2027]})
future_years['const'] = 1

pred = model.get_prediction(future_years)
forecast = pred.summary_frame(alpha=0.15)          # ~85% interval

forecast['year'] = future_years['year']
forecast = forecast[['year', 'mean', 'obs_ci_lower', 'obs_ci_upper']]
forecast.columns = ['Year', 'Forecast', 'Lower', 'Upper']

print("\nBaseline forecast:")
print(forecast.round(1))

# ── 5. Simple scenarios ──────────────────────────────────────────
forecast['Optimistic'] = forecast['Forecast'] + 3.5   # example: +3.5 pp from events
forecast['Pessimistic'] = forecast['Forecast'] - 2.0

# ── 6. Plot ──────────────────────────────────────────────────────
plt.figure(figsize=(10,6))

# Historical
plt.plot(access['year'], access['value_numeric'], 'o-', color='navy', label='Historical')

# Forecast
plt.plot(forecast['Year'], forecast['Forecast'], '--', color='teal', label='Baseline forecast')
plt.fill_between(forecast['Year'], forecast['Lower'], forecast['Upper'], color='teal', alpha=0.12)

plt.plot(forecast['Year'], forecast['Optimistic'], '-.', color='green', label='Optimistic')
plt.plot(forecast['Year'], forecast['Pessimistic'], '-.', color='orange', label='Pessimistic')

plt.title("Account Ownership Forecast — Ethiopia 2025–2027")
plt.xlabel("Year")
plt.ylabel("Account Ownership (%)")
plt.grid(True, alpha=0.3)
plt.legend()
plt.tight_layout()
plt.savefig("../reports/figures/forecast_access.png", dpi=150)
plt.show()