In [1]:
#!/usr/bin/env python3
# dhs_fp_media_men_robust_fixed.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import json
import math

# ------------------------------------------------------------------
# 1. SETTINGS
# ------------------------------------------------------------------
DATA_DIR = "data"
FILE_PATH = os.path.join(DATA_DIR, "RWMR81FL.DTA")

# Eastern Province Districts
DIST_MAP = {
    51: 'Rwamagana', 52: 'Nyagatare', 53: 'Gatsibo',
    54: 'Kayonza', 55: 'Kirehe', 56: 'Ngoma', 57: 'Bugesera'
}

LABELS = ["Radio", "Television", "Newspaper", "Telephone", "None of these"]
COLORS = ["#4F81BD", "#9BBB59", "#C0504D", "#8064A2", "#7F7F7F"]

# ------------------------------------------------------------------
# 2. CALCULATION HELPERS
# ------------------------------------------------------------------
# def standard_round(n):
#      return int(math.floor(n))

def get_men_media_stats(df):
    """Calculates weighted percentages with robust logic."""
    if df.empty or df['w'].sum() == 0:
        return pd.Series({lbl: 0 for lbl in LABELS})
    
    # Calculation (1 = Yes)
    radio_f = np.average(df['mv384a'] == 1, weights=df['w']) * 100
    tv_f    = np.average(df['mv384b'] == 1, weights=df['w']) * 100
    news_f  = np.average(df['mv384c'] == 1, weights=df['w']) * 100
    phone_f = np.average(df['mv384d'] == 1, weights=df['w']) * 100
    
    # None of these (None of the FOUR sources)
    none_mask = (df['mv384a'] != 1) & (df['mv384b'] != 1) & \
                (df['mv384c'] != 1) & (df['mv384d'] != 1)
    none_f = np.average(none_mask, weights=df['w']) * 100
    
    return pd.Series({
        "Radio": standard_round(radio_f),
        "Television": standard_round(tv_f),
        "Newspaper": standard_round(news_f),
        "Telephone": standard_round(phone_f),
        "None of these": standard_round(none_f)
    })

# ------------------------------------------------------------------
# 3. ANALYSIS PIPELINE
# ------------------------------------------------------------------
if __name__ == "__main__":
    if not os.path.exists(FILE_PATH):
        print(f"❌ Error: {FILE_PATH} not found."); exit()

    print("Loading Men's Data...")
    df = pd.read_stata(FILE_PATH, convert_categoricals=False)
    df.columns = df.columns.str.lower()

    # --- FORCED NUMERIC CONVERSION (Prevents 0.0 results) ---
    for col in ['mv005', 'mv024', 'mv012', 'mv384a', 'mv384b', 'mv384c', 'mv384d']:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

    # --- AUTO-DETECT DISTRICT COLUMN ---
    # Try common names for district in Rwanda MR files
    dist_col = None
    for col in ['smdistrict', 'smdstr', 'shdistrict', 'mv001']:
        if col in df.columns:
            dist_col = col
            break
    
    if not dist_col:
        print("❌ Error: Could not find a district column."); exit()
    print(f"Using '{dist_col}' for district identification.")

    # --- FILTERS ---
    # 1. Age 15-49
    df = df[(df['mv012'] >= 15) & (df['mv012'] <= 49)].copy()
    
    # 2. Weights
    df['w'] = df['mv005'] / 1000000.0

    # --- AGGREGATION ---
    results = {}
    
    # Filter Eastern Province (Region 5)
    df_east = df[df['mv024'] == 5].copy()
    print(f"Found {len(df_east)} men in the Eastern Province.")
    
    if len(df_east) == 0:
        print("❌ Error: No data found for Region 5. Check if Region 5 is correct for East.")
        exit()

    for d_code, d_name in DIST_MAP.items():
        # Ensure d_code is same type as column
        subset = df_east[df_east[dist_col].astype(float) == float(d_code)]
        results[d_name] = get_men_media_stats(subset)

    # Totals
    results["East Province"] = get_men_media_stats(df_east)
    results["Rwanda (National)"] = get_men_media_stats(df)

    final_df = pd.DataFrame(results).T[LABELS]
    print("\n--- Summary Results ---")
    print(final_df)

    # JSON Output
    with open("eastern_fp_media_men_robust.json", "w") as f:
        json.dump(final_df.to_dict(orient='index'), f, indent=4)

    # Plot
    ax = final_df.plot(kind="bar", figsize=(16, 8), color=COLORS, width=0.85, edgecolor="white")
    plt.title("Exposure to Family Planning Messages (Men 15-49)\nEastern Province Districts vs National", 
              fontsize=14, fontweight="bold", pad=25)
    plt.xticks(rotation=0, fontsize=10)
    ax.yaxis.set_visible(False)
    for s in ["top", "right", "left"]: ax.spines[s].set_visible(False)
    plt.legend(frameon=False, loc="upper center", bbox_to_anchor=(0.5, -0.12), ncol=5, fontsize=11)
    
    for c in ax.containers:
        ax.bar_label(c, fmt='%d', padding=3, fontsize=10, fontweight="bold")

    plt.tight_layout()
    plt.savefig("FP_Media_Men_Robust.png", dpi=300)
    print("✅ Success! Check FP_Media_Men_Robust.png for the chart.")

Loading Men's Data...
Using 'smdistrict' for district identification.
Found 1409 men in the Eastern Province.


NameError: name 'standard_round' is not defined