In [3]:
#!/usr/bin/env python3
# dhs_media_exposure_eastern_gender.py

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import math
import json

# ------------------------------------------------------------------
# 1. CONFIGURATION
# ------------------------------------------------------------------
DATA_DIR = os.path.join("..", "data") 

DATASETS = [
    {
        "label": "Women",
        "file":  "RWIR81FL.DTA",
        "vars": {
            "wt": "v005",
            "reg": "v024",
            "dist": "sdistrict",
            "news": "v157",  # Newspaper
            "tv":   "v159",  # TV
            "radio": "v158"  # Radio
        }
    },
    {
        "label": "Men",
        "file":  "RWMR81FL.DTA",
        "vars": {
            "wt": "mv005",
            "reg": "mv024",
            "dist": "smdistrict", # Sometimes 'sm701' in older versions, but smdistrict is standard for PR/IR/MR 81
            "news": "mv157", 
            "tv":   "mv159", 
            "radio": "mv158" 
        }
    }
]

# Eastern Province Districts
DIST_MAP = {
    51: 'Rwamagana', 
    52: 'Nyagatare', 
    53: 'Gatsibo',
    54: 'Kayonza',
    55: 'Kirehe',
    56: 'Ngoma',
    57: 'Bugesera'
}

COLORS   = ["#5B9BD5", "#C0504D", "#00B050"] 
LABELS   = ["Reads newspaper", "Watching TV", "Listen to Radio"]

# ------------------------------------------------------------------
# 2. CALCULATION HELPERS
# ------------------------------------------------------------------
def standard_round(n):
    return int(math.floor(n + 0.5))

def get_pct(df, col_name):
    if df.empty or df['w'].sum() == 0:
        return 0
    val = np.average(df[col_name], weights=df['w']) * 100
    return standard_round(val)

def get_triplet(df_geo):
    return {
        "Reads newspaper": get_pct(df_geo, "news_weekly"),
        "Watching TV":     get_pct(df_geo, "tv_weekly"),
        "Listen to Radio": get_pct(df_geo, "radio_weekly")
    }

def analyze_dataset(config):
    label = config['label']
    file_path = os.path.join(DATA_DIR, config['file'])
    v = config['vars']

    print(f"\n--- Processing: {label} ({config['file']}) ---")
    
    if not os.path.exists(file_path):
        print(f"❌ Error: File {file_path} not found.")
        return

    try:
        df = pd.read_stata(file_path, convert_categoricals=False)
        df.columns = df.columns.str.lower()
    except Exception as e:
        print(f"❌ Error reading file: {e}")
        return

    # --- WEIGHTS ---
    df["w"] = df[v['wt']] / 1000000.0

    # --- DEFINE INDICATORS (Weekly Exposure) ---
    # Codes: 2=At least once a week, 3=Almost every day
    df["news_weekly"]  = (df[v['news']] >= 2).astype(int)
    df["tv_weekly"]    = (df[v['tv']] >= 2).astype(int)
    df["radio_weekly"] = (df[v['radio']] >= 2).astype(int)

    # --- AGGREGATION ---
    data_rows = {}

    # A. Districts (Eastern Province = Code 5)
    df_east = df[df[v['reg']] == 5]
    
    if v['dist'] in df_east.columns:
        for dist_code, dist_name in DIST_MAP.items():
            subset = df_east[df_east[v['dist']] == dist_code]
            data_rows[dist_name] = get_triplet(subset)
    else:
        print(f"⚠️ Warning: District variable '{v['dist']}' not found in {label} dataset.")

    # B. Eastern Province Total
    data_rows["Eastern Province"] = get_triplet(df_east)

    # C. Rwanda (National)
    data_rows["Rwanda (National)"] = get_triplet(df)

    # Convert to DataFrame
    final_df = pd.DataFrame.from_dict(data_rows, orient='index')
    final_df = final_df[LABELS] 
    
    print(final_df)

    # --- JSON OUTPUT ---
    json_name = f"Media_Exposure_{label}_Eastern.json"
    output_dict = {
        "indicator": f"Percentage of {label} (15-49) exposed to specific media weekly",
        "unit": "Percentage (%)",
        "region": "Eastern Province",
        "data": final_df.to_dict(orient='index')
    }
    with open(json_name, "w") as f:
        json.dump(output_dict, f, indent=4)

    # --- PLOTTING ---
    ax = final_df.plot(kind="bar", color=COLORS, figsize=(14, 8), 
                       width=0.8, edgecolor="white")

    plt.title(f"Percentage of {label} (15-49) exposed to media weekly\n(Eastern Province & National)", 
              fontsize=15, fontweight="bold", pad=20)
    
    # Updated rotation for 7 districts
    plt.xticks(rotation=45, ha='right', fontsize=11)
    plt.grid(axis="y", ls="--", alpha=0.3)
    
    ax.yaxis.set_visible(False)
    for s in ["top", "right", "left"]: 
        ax.spines[s].set_visible(False)

    plt.legend(ncol=3, loc="upper center", bbox_to_anchor=(0.5, -0.15), 
               frameon=False, fontsize=12)

    # Add labels on top of bars
    for c in ax.containers:
        ax.bar_label(c, fmt='%d%%', padding=3, fontsize=10, fontweight="bold")

    plt.tight_layout()
    png_name = f"Media_Exposure_{label}_Eastern.png"
    plt.savefig(png_name, dpi=300)
    plt.close()
    print(f"✅ Saved: {json_name} and {png_name}")

# ------------------------------------------------------------------
# 4. EXECUTION
# ------------------------------------------------------------------
if __name__ == "__main__":
    for ds in DATASETS:
        analyze_dataset(ds)
    print("\n✅ Processing Complete.")


--- Processing: Women (RWIR81FL.DTA) ---


  df["w"] = df[v['wt']] / 1000000.0
  df["news_weekly"]  = (df[v['news']] >= 2).astype(int)
  df["tv_weekly"]    = (df[v['tv']] >= 2).astype(int)
  df["radio_weekly"] = (df[v['radio']] >= 2).astype(int)


                   Reads newspaper  Watching TV  Listen to Radio
Rwamagana                        9           26               61
Nyagatare                        8           11               65
Gatsibo                          8           19               79
Kayonza                         15           17               71
Kirehe                           5           10               60
Ngoma                            9           13               64
Bugesera                         7           22               59
Eastern Province                 9           17               66
Rwanda (National)                9           20               62
✅ Saved: Media_Exposure_Women_Eastern.json and Media_Exposure_Women_Eastern.png

--- Processing: Men (RWMR81FL.DTA) ---


  df["w"] = df[v['wt']] / 1000000.0
  df["news_weekly"]  = (df[v['news']] >= 2).astype(int)
  df["tv_weekly"]    = (df[v['tv']] >= 2).astype(int)
  df["radio_weekly"] = (df[v['radio']] >= 2).astype(int)


                   Reads newspaper  Watching TV  Listen to Radio
Rwamagana                       18           34               80
Nyagatare                        9           23               70
Gatsibo                          5           18               79
Kayonza                          5           25               91
Kirehe                          10           16               76
Ngoma                           10           26               94
Bugesera                        17           32               88
Eastern Province                10           24               82
Rwanda (National)               14           29               80
✅ Saved: Media_Exposure_Men_Eastern.json and Media_Exposure_Men_Eastern.png

✅ Processing Complete.
