# Task 3: Event Impact Modeling

## Objective
Model how events (policies, product launches, infrastructure investments) affect financial inclusion indicators.

## Approach
1. Load and explore impact_links data
2. Build event-indicator association matrix
3. Document comparable country evidence
4. Validate impact estimates against historical data
5. Refine estimates based on observations

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os
from datetime import datetime

# Add parent directory to path
sys.path.append(os.path.abspath(os.path.join(os.getcwd(), '..')))

from src.loader import load_data
from src.impact_model import (
    build_impact_matrix,
    create_impact_heatmap,
    get_impact_summary,
    validate_impact_model,
    apply_comparable_evidence,
    calculate_cumulative_impact
)

# Set style
sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (12, 6)

print("Libraries imported successfully!")

## 1. Load and Explore Data

In [None]:
# Load data
data = load_data('../data/raw/ethiopia_fi_unified_data.xlsx')

observations = data['observations']
events = data['events']
targets = data['targets']
impacts = data['impacts']

print(f"Loaded {len(observations)} observations")
print(f"Loaded {len(events)} events")
print(f"Loaded {len(targets)} targets")
print(f"Loaded {len(impacts)} impact links")

In [None]:
# Explore events
print("\n=== EVENTS ===")
print(events[['record_id', 'indicator', 'category', 'observation_date']].to_string(index=False))

In [None]:
# Explore impact links
print("\n=== IMPACT LINKS ===")
print(impacts[['parent_id', 'related_indicator', 'impact_estimate', 'lag_months', 'evidence_basis']].to_string(index=False))

## 2. Build Event-Indicator Association Matrix

This matrix shows which events affect which indicators and by how much.

In [None]:
# Build impact matrix
impact_matrix = build_impact_matrix(events, impacts)

print("\n=== EVENT-INDICATOR IMPACT MATRIX ===")
print("\nRows = Events, Columns = Indicators, Values = Impact Estimate (percentage points)\n")
print(impact_matrix)

In [None]:
# Create heatmap visualization
os.makedirs('../reports/figures', exist_ok=True)
fig = create_impact_heatmap(
    impact_matrix, 
    save_path='../reports/figures/task3_impact_matrix_heatmap.png'
)
plt.show()

print("\n✓ Heatmap saved to reports/figures/task3_impact_matrix_heatmap.png")

## 3. Impact Summary Table

Detailed breakdown of all event-indicator relationships.

In [None]:
# Generate impact summary
impact_summary = get_impact_summary(impacts, events)

print("\n=== IMPACT SUMMARY ===")
print(impact_summary.to_string(index=False))

# Save to CSV
impact_summary.to_csv('../reports/task3_impact_summary.csv', index=False)
print("\n✓ Impact summary saved to reports/task3_impact_summary.csv")

## 4. Comparable Country Evidence

For events where Ethiopian pre/post data is insufficient, we use documented impacts from similar contexts.

In [None]:
# Kenya M-Pesa evidence
kenya_mpesa = apply_comparable_evidence('Kenya', 'M-Pesa Launch', 'ACC_MM_ACCOUNT')

print("\n=== COMPARABLE EVIDENCE: Kenya M-Pesa Launch ===")
print(f"Impact on Mobile Money Accounts: {kenya_mpesa.get('impact', 'N/A')}pp over {kenya_mpesa.get('timeframe', 'N/A')}")
print(f"Source: {kenya_mpesa.get('source', 'N/A')}")
print(f"Notes: {kenya_mpesa.get('notes', 'N/A')}")

# Tanzania interoperability evidence
tanzania_interop = apply_comparable_evidence('Tanzania', 'Interoperability', 'USG_DIGITAL_PAYMENT')

print("\n=== COMPARABLE EVIDENCE: Tanzania Interoperability ===")
print(f"Impact on Digital Payment Usage: {tanzania_interop.get('impact', 'N/A')}pp over {tanzania_interop.get('timeframe', 'N/A')}")
print(f"Source: {tanzania_interop.get('source', 'N/A')}")
print(f"Notes: {tanzania_interop.get('notes', 'N/A')}")

# Rwanda digital strategy evidence
rwanda_strategy = apply_comparable_evidence('Rwanda', 'Digital Strategy', 'ACC_OWNERSHIP')

print("\n=== COMPARABLE EVIDENCE: Rwanda Digital Strategy ===")
print(f"Impact on Account Ownership: {rwanda_strategy.get('impact', 'N/A')}pp over {rwanda_strategy.get('timeframe', 'N/A')}")
print(f"Source: {rwanda_strategy.get('source', 'N/A')}")
print(f"Notes: {rwanda_strategy.get('notes', 'N/A')}")

## 5. Validate Impact Model Against Historical Data

Test if our impact estimates align with what actually happened after Telebirr and M-Pesa launches.

In [None]:
# Validate Telebirr impact on Mobile Money Accounts
print("\n=== VALIDATION: Telebirr Launch Impact on Mobile Money Accounts ===")

telebirr_validation = validate_impact_model(
    observations,
    events,
    impacts,
    indicator_code='ACC_MM_ACCOUNT',
    event_id='EVT_001'  # Telebirr Launch
)

if 'error' not in telebirr_validation:
    print(f"Event: {telebirr_validation['event']}")
    print(f"Event Date: {telebirr_validation['event_date'].date()}")
    print(f"Impact Start Date (with lag): {telebirr_validation['impact_date'].date()}")
    print(f"\nPredicted Impact: {telebirr_validation['predicted_impact']:.1f} pp")
    print(f"Value Before: {telebirr_validation['value_before']:.1f}%")
    print(f"Value After: {telebirr_validation['value_after']:.1f}%")
    print(f"Observed Change: {telebirr_validation['observed_change']:.1f} pp over {telebirr_validation['time_period_years']:.1f} years")
    print(f"Annualized Change: {telebirr_validation['annualized_change']:.1f} pp/year")
    print(f"\nPrediction Accuracy: {telebirr_validation['prediction_accuracy']}")
else:
    print(f"Validation Error: {telebirr_validation['error']}")

In [None]:
# Validate M-Pesa impact on Mobile Money Accounts
print("\n=== VALIDATION: M-Pesa Launch Impact on Mobile Money Accounts ===")

mpesa_validation = validate_impact_model(
    observations,
    events,
    impacts,
    indicator_code='ACC_MM_ACCOUNT',
    event_id='EVT_003'  # M-Pesa Launch
)

if 'error' not in mpesa_validation:
    print(f"Event: {mpesa_validation['event']}")
    print(f"Event Date: {mpesa_validation['event_date'].date()}")
    print(f"Impact Start Date (with lag): {mpesa_validation['impact_date'].date()}")
    print(f"\nPredicted Impact: {mpesa_validation['predicted_impact']:.1f} pp")
    print(f"\nNote: M-Pesa is recent; full impact may not yet be observable in survey data.")
else:
    print(f"Validation Note: {mpesa_validation['error']}")
    print("This is expected as M-Pesa launched recently and survey data lags.")

## 6. Cumulative Impact Analysis

Calculate the total cumulative effect of all events on key indicators.

In [None]:
# Calculate cumulative impacts as of end of 2024
reference_date = pd.Timestamp('2024-12-31')

indicators_to_analyze = ['ACC_OWNERSHIP', 'ACC_MM_ACCOUNT', 'USG_DIGITAL_PAYMENT']

print(f"\n=== CUMULATIVE IMPACT ANALYSIS (as of {reference_date.date()}) ===")
print("\nTotal estimated impact of all events on each indicator:\n")

cumulative_results = []

for indicator in indicators_to_analyze:
    cumulative = calculate_cumulative_impact(
        impacts,
        events,
        indicator,
        reference_date
    )
    
    cumulative_results.append({
        'Indicator': indicator,
        'Cumulative Impact (pp)': cumulative
    })
    
    print(f"{indicator}: +{cumulative:.1f} percentage points")

# Create visualization
cumulative_df = pd.DataFrame(cumulative_results)

fig, ax = plt.subplots(figsize=(10, 6))
bars = ax.bar(cumulative_df['Indicator'], cumulative_df['Cumulative Impact (pp)'], 
              color=['#2E86AB', '#A23B72', '#F18F01'])

ax.set_xlabel('Indicator', fontsize=12, fontweight='bold')
ax.set_ylabel('Cumulative Impact (percentage points)', fontsize=12, fontweight='bold')
ax.set_title('Cumulative Event Impact by Indicator (as of Dec 2024)', 
             fontsize=14, fontweight='bold', pad=20)
ax.grid(axis='y', alpha=0.3)

# Add value labels on bars
for bar in bars:
    height = bar.get_height()
    ax.text(bar.get_x() + bar.get_width()/2., height,
            f'+{height:.1f}pp',
            ha='center', va='bottom', fontweight='bold')

plt.tight_layout()
plt.savefig('../reports/figures/task3_cumulative_impact.png', dpi=300, bbox_inches='tight')
plt.show()

print("\n✓ Cumulative impact chart saved to reports/figures/task3_cumulative_impact.png")

## 7. Key Insights and Methodology Documentation

In [None]:
print("\n" + "="*80)
print("KEY INSIGHTS FROM EVENT IMPACT MODELING")
print("="*80)

print("""
1. TELEBIRR LAUNCH (May 2021)
   - Strongest impact on Mobile Money Account ownership (+4.0pp)
   - Moderate impact on Digital Payment Usage (+5.0pp with 12-month lag)
   - Evidence: Based on Kenya M-Pesa comparable trajectory
   - Validation: Observed growth from 4.7% (2021) to 9.45% (2024) aligns with model

2. M-PESA LAUNCH (August 2023)
   - Secondary impact on Mobile Money Accounts (+2.0pp)
   - Competitive effect on Digital Payment Usage (+3.0pp)
   - Evidence: Second-mover typically has lower initial impact
   - Note: Full impact not yet observable due to recency

3. INTEROPERABILITY (January 2024)
   - High impact on Digital Payment Usage (+4.0pp with 3-month lag)
   - Low impact on Account Ownership (+1.5pp)
   - Evidence: Tanzania interoperability case study
   - Mechanism: Reduces friction, increases utility of existing accounts

4. POLICY EVENTS
   - NBE Digital Strategy: Moderate long-term impact (+2.0pp, 18-month lag)
   - Telecom Liberalization: Indirect market preparation effect (+1.0pp)
   - Evidence: Policy impacts are typically delayed and indirect

5. CUMULATIVE EFFECTS
   - Digital Payment Usage shows highest cumulative impact
   - Mobile Money Accounts benefit from multiple reinforcing events
   - Account Ownership growth is more gradual and policy-dependent
""")

print("\n" + "="*80)
print("METHODOLOGY NOTES")
print("="*80)

print("""
FUNCTIONAL FORMS:
- Step change model: Impact occurs after lag period and persists
- Additive impacts: Multiple events sum (conservative approach)
- Lag effects: Typically 3-18 months depending on event type

EVIDENCE SOURCES:
1. Comparable countries: Kenya, Tanzania, Rwanda
2. Historical validation: Ethiopian pre/post data where available
3. Market dynamics: Competition, network effects, policy signals

ASSUMPTIONS:
- Impact estimates are percentage point changes, not percentage changes
- Effects are assumed linear and additive (no interaction terms)
- Lag periods are fixed (reality may vary)
- No decay function (impacts persist indefinitely)

LIMITATIONS:
- Limited historical data points (Findex every 3 years)
- Recent events lack validation data
- Comparable evidence may not fully transfer to Ethiopian context
- Model does not account for macroeconomic factors
- Active vs. registered account gap not fully modeled
""")

print("\n" + "="*80)

## 8. Export Results for Task 4 (Forecasting)

In [None]:
# Save impact matrix for use in forecasting
impact_matrix.to_csv('../data/processed/impact_matrix.csv')
print("✓ Impact matrix saved to data/processed/impact_matrix.csv")

# Save detailed impact links
impacts.to_csv('../data/processed/impact_links_processed.csv', index=False)
print("✓ Impact links saved to data/processed/impact_links_processed.csv")

print("\n✅ Task 3 Complete! Impact model ready for forecasting.")