In [1]:
import pandas as pd

# Load the main datasets
data = pd.read_csv('../data/raw/ethiopia_fi_unified_data.csv')
codes = pd.read_csv('../data/raw/reference_codes.csv')

# Let's look at the first few rows of the main data
print("--- Main Data Info ---")
print(data.info())
print(data.head())

# Check the types of records we have
print("\n--- Record Counts ---")
print(data['record_type'].value_counts())

--- Main Data Info ---
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 43 entries, 0 to 42
Data columns (total 34 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   record_id            43 non-null     object 
 1   record_type          43 non-null     object 
 2   category             10 non-null     object 
 3   pillar               33 non-null     object 
 4   indicator            43 non-null     object 
 5   indicator_code       43 non-null     object 
 6   indicator_direction  33 non-null     object 
 7   value_numeric        33 non-null     float64
 8   value_text           10 non-null     object 
 9   value_type           43 non-null     object 
 10  unit                 33 non-null     object 
 11  observation_date     43 non-null     object 
 12  period_start         10 non-null     object 
 13  period_end           10 non-null     object 
 14  fiscal_year          43 non-null     object 
 15  gender             

In [2]:

import os
# 1. Load the original data
raw_data_path = '../data/raw/ethiopia_fi_unified_data.csv'
df = pd.read_csv(raw_data_path)

# 2. Create the "New Data" rows
new_rows = [
    # New Observation: Mobile Money Accounts in 2025
    {
        'record_id': 'REC_NEW_01', 'record_type': 'observation', 'pillar': 'ACCESS',
        'indicator': 'Mobile Money Accounts', 'indicator_code': 'MM_ACCOUNTS',
        'value_numeric': 136000000, 'value_type': 'count', 'unit': 'accounts',
        'observation_date': '2025-12-01', 'source_name': 'National Bank of Ethiopia',
        'confidence': 'high', 'collected_by': 'Your Name', 'original_text': '136 million accounts reached in 2025'
    },
    # New Event: Mandatory Fuel Digitization
    {
        'record_id': 'REC_NEW_02', 'record_type': 'event', 'category': 'policy',
        'indicator': 'Digital Payment Mandate', 'indicator_code': 'FUEL_DIGITIZATION',
        'observation_date': '2024-04-01', 'source_name': 'Gov Directive',
        'confidence': 'high', 'collected_by': 'Your Name', 'original_text': 'Mandatory fuel payments via mobile money'
    },
     # New Observation: Digital ID (Fayda)
    {
        'record_id': 'REC_NEW_03', 'record_type': 'observation', 'pillar': 'ENABLER',
        'indicator': 'National Digital ID Registration', 'indicator_code': 'FAYDA_REG',
        'value_numeric': 33600000, 'value_type': 'count', 'unit': 'people',
        'observation_date': '2026-01-01', 'source_name': 'NIDP Ethiopia',
        'confidence': 'high', 'collected_by': 'Your Name', 'original_text': '33.6 million Fayda registrations'
    }
]

# 3. Combine old and new data
new_data_df = pd.DataFrame(new_rows)
processed_df = pd.concat([df, new_data_df], ignore_index=True)

# 4. Save to the processed folder
output_path = '../data/processed/ethiopia_fi_processed_data.csv'
processed_df.to_csv(output_path, index=False)

print(f"✅ Success! Your enriched data is saved at: {output_path}")
print(f"Total records now: {len(processed_df)}")

✅ Success! Your enriched data is saved at: ../data/processed/ethiopia_fi_processed_data.csv
Total records now: 46


In [None]:
# Create the "Impact Links" logic
impact_links = [
    # Link: Fuel Digitization -> Digital Payments
    {
        'record_id': 'LINK_01', 'record_type': 'impact_link', 
        'parent_id': 'REC_NEW_02', # Links to the Fuel Event
        'pillar': 'USAGE', 'related_indicator': 'Digital Payment Adoption',
        'impact_direction': 'positive', 'impact_magnitude': 0.08, # Expecting ~8% boost
        'lag_months': 1, 'evidence_basis': 'Mandatory policy effect',
        'confidence': 'high', 'collected_by': 'Your Name'
    },
    # Link: Digital ID -> Account Ownership
    {
        'record_id': 'LINK_02', 'record_type': 'impact_link', 
        'parent_id': 'REC_NEW_03', # Links to the Fayda ID Observation
        'pillar': 'ACCESS', 'related_indicator': 'Account Ownership Rate',
        'impact_direction': 'positive', 'impact_magnitude': 0.05, # Expecting ~5% boost
        'lag_months': 6, 'evidence_basis': 'KYC simplification',
        'confidence': 'medium', 'collected_by': 'Your Name'
    }
]

# Add them to your existing dataframe
impact_df = pd.DataFrame(impact_links)
final_processed_df = pd.concat([processed_df, impact_df], ignore_index=True)

# Save the final version
final_processed_df.to_csv('../data/processed/ethiopia_fi_processed_data.csv', index=False)

print("✅ Impact links added! Task 1 data is now complete.")