In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import missingno as msno

# Load your enriched data from Task 1
df = pd.read_csv('../data/processed/ethiopia_fi_enriched.csv')
df['observation_date'] = pd.to_datetime(df['observation_date'])

# Separate observations for trend analysis
obs_df = df[df['record_type'] == 'observation'].copy()
events_df = df[df['record_type'] == 'event'].copy()

In [None]:
plt.figure(figsize=(10, 6))
msno.matrix(obs_df.pivot(index='observation_date', columns='indicator', values='value_numeric'))
plt.title("Data Completeness Matrix (Ethiopia FI Indicators)")
plt.show()

In [None]:
# Focus on the primary indicator: Account Ownership
acc_data = obs_df[obs_df['indicator_code'] == 'ACC_OWNERSHIP'].sort_values('observation_date')

plt.figure(figsize=(12, 6))
sns.lineplot(data=acc_data, x='observation_date', y='value_numeric', marker='o', label='Account Ownership (%)')

# Overlay Events as vertical lines
for idx, event in events_df.iterrows():
    plt.axvline(x=event['observation_date'], color='red', linestyle='--', alpha=0.5)
    plt.text(event['observation_date'], plt.ylim()[1]*0.9, event['indicator'], 
             rotation=90, color='red', fontsize=9, verticalalignment='top')

plt.title("Financial Inclusion Growth & Key Policy Milestones (2011-2024)")
plt.ylabel("Percentage of Population")
plt.grid(True, alpha=0.3)
plt.show()

In [None]:
# Pivot data to get indicators as columns
pivot_df = obs_df.pivot_table(index='observation_date', columns='indicator_code', values='value_numeric')

plt.figure(figsize=(10, 8))
sns.heatmap(pivot_df.corr(), annot=True, cmap='coolwarm', fmt=".2f")
plt.title("Correlation Matrix: Enablers vs. Access vs. Usage")
plt.show()