In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np

# --- STEP 1: Load Data ---
df_hr = pd.read_stata("../data/RWHR81FL.DTA", convert_categoricals=False)
df_hr.columns = df_hr.columns.str.lower()

# *** CRITICAL FIX 1: Filter for COMPLETED Interviews ***
# DHS code hv015: 1 = Completed. 
# If we don't do this, we count empty houses as "No Electricity", which is wrong.
print(f"Original Row Count: {len(df_hr)}")
df_hr = df_hr[df_hr['hv015'] == 1].copy()
print(f"Count after removing incomplete interviews: {len(df_hr)}")

# --- STEP 2: Logic with NaN Preservation ---
# We use a dictionary to map variables
binary_vars = {
    'has_electricity': 'hv206',
    'has_mobile': 'hv243a',
    'has_radio': 'hv207',
    'has_tv': 'hv208',
    'has_computer': 'hv243e'
}

# *** CRITICAL FIX 2: Do NOT turn NaN into 0 ***
# We only want to convert 1->1 and everything else (0) -> 0.
# BUT, we must keep NaNs as NaNs so we can drop them later.
for new_col, old_col in binary_vars.items():
    # If it is 1, keep 1. If it is 0, keep 0. If it is NaN, keep NaN.
    # We also treat 9 (Missing/Unknown) as NaN to be safe.
    df_hr[new_col] = df_hr[old_col].replace({9: np.nan})

# --- STEP 3: Define the "Safe" Weighted Mean Function ---
# This function automatically drops NaNs for the specific column we are calculating
def get_weighted_pct(df, target_col, weight_col='hv005'):
    # Create a temporary copy with just the columns we need
    temp = df[[target_col, weight_col]].dropna()
    
    if len(temp) == 0:
        return 0
    
    # Calculate weighted average
    return np.average(temp[target_col], weights=temp[weight_col]) * 100

# --- STEP 4: Western Province Setup ---
KIGALI_CITY_CODE = 1 
df_hr_kigali = df_hr[df_hr['hv024'] == KIGALI_CITY_CODE].copy()

# District Map
district_map = {
    11: 'NYARUGENGE', 12: 'GASABO', 13: 'KICUKIRO', 
}

# --- STEP 5: Generate Graphs ---
indicators_to_graph = ['has_electricity', 'has_mobile', 'has_radio', 'has_tv', 'has_computer']

titles = {
    'has_electricity': 'Electricity Access',
    'has_mobile': 'Mobile Phone',
    'has_radio': 'Radio',
    'has_tv': 'Television',
    'has_computer': 'Computer'
}

print("Generating corrected graphs...")

for col in indicators_to_graph:
    
    # 1. District Values (Using the Safe Function)
    district_data = df_hr_kigali.groupby('shdistrict').apply(
        lambda x: get_weighted_pct(x, col)
    ).round(1)
    
    # Apply Map
    district_data.index = district_data.index.map(district_map)
    
    # 2. Province Value
    province_val = get_weighted_pct(df_hr_kigali, col)
    
    # 3. National Value (Using full cleaned dataset)
    national_val = get_weighted_pct(df_hr, col)

    # 4. Combine
    # Note: I switched to Vertical Bars (kind='bar') to match your screenshot
    final_plot_data = pd.concat([
        district_data, # Keep district order or use .sort_index()
        pd.Series([province_val], index=['kigali']),
        pd.Series([national_val], index=['Rwanda'])
    ])

    # 5. Plotting (Vertical Bars to match your example)
    plt.figure(figsize=(10, 6))
    
    # Colors: Blue for districts, Green for Province, Orange for National
    colors = ['#4472C4'] * len(district_data) + ['#00B050', '#FFC000']
    
    ax = final_plot_data.plot(kind='bar', color=colors, edgecolor='white', width=0.6)
    
    # Styling
    plt.title(f'Percentage of households with {titles.get(col, col)}', fontsize=14, fontweight='bold')
    plt.xticks(rotation=0) # Keep text flat
    plt.grid(axis='y', linestyle='--', alpha=0.3)
    
    # Remove y-axis numbers for clean look (optional, matches your image)
    ax.yaxis.set_visible(False)
    
    # Add numbers on top of bars
    for p in ax.patches:
        ax.annotate(f'{p.get_height():.0f}', 
                   (p.get_x() + p.get_width() / 2., p.get_height()), 
                   ha='center', va='center', 
                   xytext=(0, 9), 
                   textcoords='offset points',
                   fontsize=11)

    # Set bottom spine color
    ax.spines['top'].set_visible(False)
    ax.spines['right'].set_visible(False)
    ax.spines['left'].set_visible(False)
    
    plt.tight_layout()
    plt.savefig(f"Corrected_Graph_{col}.png")
    plt.show() # Shows it in the notebook
    plt.close()

print("Graphs finished.")

ModuleNotFoundError: No module named 'matplotlib'