# Task 2: Exploratory Data Analysis & Enrichment Verification

**Objective**: Validate the `ethiopia_fi_unified_data.xlsx` dataset, check the merged enrichment data, and visualize key trends in Financial Inclusion (Access & Usage).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
import os

# Add src to path
sys.path.append(os.path.abspath(os.path.join('../src')))
from data_loader import load_data

# Set style
sns.set_theme(style="whitegrid")

## 1. Load Data

In [None]:
df = load_data(data_path=r"../data/raw/ethiopia_fi_unified_data.xlsx")
df.head()

## 2. Unpack Categories
Visualize the distribution of our data.

In [None]:
plt.figure(figsize=(10, 5))
sns.countplot(data=df, x='record_type')
plt.title('Distribution of Record Types')
plt.show()

## 3. ACCESS: Account Ownership Trend
Filter for Account Ownership Rate (ACC_OWNERSHIP) and plot the timeline.

In [None]:
access_df = df[
    (df['record_type'] == 'observation') & 
    (df['indicator_code'] == 'ACC_OWNERSHIP') & 
    (df['gender'] == 'all')
].sort_values('observation_date')

plt.figure(figsize=(10, 6))
sns.lineplot(data=access_df, x='data_year', y='value_numeric', marker='o')
plt.title('Ethiopia Account Ownership Rate (2011-2024)')
plt.ylabel('Percentage (%)')
plt.xlabel('Year')
plt.ylim(0, 100)
for x, y in zip(access_df['data_year'], access_df['value_numeric']):
    plt.text(x, y+2, f"{y}%", ha='center')
plt.show()

## 4. USAGE: Digital Payments
Looking at Transaction Volume trends (P2P vs ATM).

In [None]:
usage_df = df[
    (df['record_type'] == 'observation') & 
    (df['indicator_code'].isin(['USG_P2P_COUNT', 'USG_ATM_COUNT']))
].sort_values('observation_date')

plt.figure(figsize=(10, 6))
sns.barplot(data=usage_df, x='data_year', y='value_numeric', hue='indicator')
plt.title('P2P vs ATM Transaction Counts')
plt.ylabel('Count')
plt.show()

## 5. Event Timeline
Visualize when major policy and product events occurred.

In [None]:
events = df[df['record_type'] == 'event'].sort_values('start_date')
events[['start_date', 'category', 'indicator']]