# Task 1: Data Exploration and Enrichment
Analysis of Ethiopia's financial inclusion data.

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

# Set style
sns.set_theme(style="whitegrid")

In [None]:
# Load data
data_path = "../data/raw/ethiopia_fi_unified_data.csv"
df = pd.read_csv(data_path)

# Display basic info
print("Dataset Shape:", df.shape)
print("\nColumns:", df.columns.tolist())

In [None]:
# count by record_type
print("\nRecord Counts by Type:")
print(df['record_type'].value_counts())

In [None]:
# Filter Observations
obs = df[df['record_type'] == 'observation']

# Plot Account Ownership Trend
plt.figure(figsize=(10, 6))
acc_own = obs[obs['indicator_code'] == 'account_ownership'].sort_values('observation_date')
plt.plot(pd.to_datetime(acc_own['observation_date']), acc_own['value_numeric'], marker='o', linestyle='-', label='Account Ownership')

# Add targets if any
targets = df[df['record_type'] == 'target']
acc_target = targets[targets['indicator_code'] == 'account_ownership_target']
plt.scatter(pd.to_datetime(acc_target['observation_date']), acc_target['value_numeric'], color='red', marker='x', s=100, label='NFIS-II Target')

plt.title('Ethiopia Account Ownership Trend & Targets')
plt.xlabel('Year')
plt.ylabel('Rate (0-1)')
plt.legend()
plt.ylim(0, 1)
plt.savefig('../reports/figures/account_ownership_trend.png')
print("\nSaved figure: account_ownership_trend.png")


In [None]:
# Enrichments Check
print("\n--- Enriched Data Points (Collected Today) ---")
new_data = df[df['collection_date'] == '2026-01-29'][['record_type', 'indicator', 'value_numeric', 'source_name', 'observation_date']]
new_data