### Imports

In [2]:
import pandas as pd
from pathlib import Path
import numpy as np
import json
import warnings
import math

from typing import Dict, Tuple
from collections import defaultdict
import sklearn.linear_model as sklearn_linear_model
import sklearn.metrics as sklearn_metrics
import sklearn.model_selection as sklearn_model_selection
import sklearn.preprocessing as sklearn_preprocessing
import sklearn.feature_selection as sklearn_feature_selection
import sklearn.ensemble as sklearn_ensemble
import sklearn.decomposition as sklearn_decomposition
from sklearn.impute import SimpleImputer

import dask.dataframe as ddf

import geopandas as gpd
import dask_geopandas as dgpd

import matplotlib.pyplot as plt
import pyreadstat
from pandas.api.types import is_numeric_dtype

In [3]:
data_path = Path('/home/selker/eop/data')

malawi_directory = data_path / 'malawi'

mosaiks_directory = data_path / 'mosaiks'

Out: 
* A data file, one-hot encoded and imputed as in roshni's replication code, with all columns included
* A summary like I construct: with "dropped" indicating either dropped for missingness, or omitted because we don't want it (say, consumption qs)
* summary should include a one-hot map, i.e. a column containing one-hat categories

## LSMS data

In [4]:
def columns_equal(df, col1, col2):
    c1 = df[col1]
    c2 = df[col2]

    if pd.api.types.is_numeric_dtype(c1) and pd.api.types.is_numeric_dtype(c2):
        return np.isclose(c1, c2, rtol=1e-4, equal_nan=True).all()
    else:
        try:
            eq = (c1 == c2).all()
        except TypeError:
            # mismatched categories -> this comparison raises a type error
            eq = False
        return eq

### Common cleaning function

In [5]:
# Inputs by survey year

# https://docs.google.com/spreadsheets/d/11I0U413LgiVYuvgPhVL1M-5bfJabCFql75tQWG551U0/edit#gid=0
currency_conversion_factors = {
    2016: 0.00461055475,
    2019: 0.003361742723912196
}

survey_directories = {
    2016: malawi_directory / 'MWI_2016_IHS-IV_v04_M_STATA14',
    2019: malawi_directory / 'MWI_2019_IHS-V_v06_M_Stata'
}

roster_paths = {
    2016: survey_directories[2016] / 'household/hh_mod_b',
    2019: survey_directories[2019] / 'HH_MOD_B'
}

file_lists = {
    2016: [
        survey_directories[2016] / f for f in (
            'household/hh_mod_a_filt',
            'household/hh_mod_f', # housing
            'household/hh_mod_h', # food security
            'household/hh_mod_n1', # household enterprises
            'household/hh_mod_s2', # household credit
            'household/hh_mod_t', # subj assessment of well-being
            'household/hh_mod_x', # ag and fisheries filter,
            'agriculture/ag_mod_a', # ownership of land
            'agriculture/ag_mod_r2', # livestock
            'agriculture/ag_mod_e3', # coupon use - rainy season
            'household_geovariables/householdgeovariablesihs4', # geo
            'consumption_aggregate/ihs4 consumption aggregate' # consumption
        )
    ],
    2019: [
        survey_directories[2019] / f for f in (
            'HH_MOD_F',
            'HH_MOD_H',
            'HH_MOD_N1',
            'HH_MOD_S2',
            'HH_MOD_T',
            'HH_MOD_X',
            'ag_mod_a',
            'ag_mod_e3',
            'hh_mod_a_filt',
            'ihs5_consumption_aggregate',
            'householdgeovariables_ihs5'
        )
    ]
}

description_overrides = {
    2016: dict(),
    2019: {
        'hh_f35_2': (
            'Of the total cost of cellphone service for the household, '
            'how much was spent on internet for all household members?'
        ),
        'hh_f35_3': (
            'Of the total cost of cellphone service for the household, '
            'how much was spent on airtime for all household members?'
        ),
        'hh_g09': (
            'Over the past one week (7 days), did any people that you did '
            'not list as household members eat any meals in your household?'
        ),
        'hh_f41_2': (
            'The last time your toilet facility was emptied, where were the '
            'contents emptied to?'
        ),
        'hh_f26a': (
            'When you last paid for electricity, what length of time '
            'did that payment cover?'
        ),
        'hh_m00': (
            'Did your household own or rent any farm implements, machinery '
            'and/or structures, such as hand hoe, panga knife, treadle pump, '
            'ox cart, tractor, plough, generator, chicken house, storage house, '
            'barn, etc... in the last 12 months?'
        ),
        'hh_h02a': (
            'In the past 7 days, how many days have you or someone in your '
            'household had to rely on less preferred or less expensive food?'
        ),
        'hh_f03b': (
            'Time unit of estimate of the rent they could receive renting the '
            'property'
        ),
        'hh_h06_oth': (
            'Specify what was the other cause of this situation (referring to '
            'a selection of food-insecurity situations)'
        ),
        'hh_f27' : (
            'Although you do not have electricity in your dwelling, does your '
            'village / neighborhood have access to electricity provided by ESCOM?'
        ),
        'hh_f41_3': (
            'Where is your toilet facility located?'
        ),
        'hh_t05': (
            'Imagine six steps, where on the bottom, the first step, stand the '
            'poorest people, and on the highest step, the sixth, stand the rich. '
            'On which step are most of you today?'
        ),
        'hh_o0a': (
            'Does the household head or spouse have any biological sons and/or '
            'daughters who are 15 years old and over and do not live in this household?'
        ),
        'ssa_aez09': (
            'Agro-ecological Zone of the household'
        ),
        'hh_s13a': (
            'Who turned you down when you tried to borrow? (follow-up to a question '
            'about asking for credit; I can’t read the entire question)'
        ),
        'hh_t14': (
            'During the last 12 months, was there a time when you or others in your '
            'household were unable to eat healthy and nutritious food because of a '
            'lack of money or other resources?'
        ),
        'hh_t01': (
            'Concerning your households food consumption over the past one month, '
            'which of the following is true? (less than adequate, adequate, more '
            'than adequate)'
        ),
        'hh_t08': (
            'Which of the following is true? Your current income . . . (followed '
            'by a list of judgments as to sufficiency of income)'
        ),
        'hh_t15': (
            'During the last 12 months, was there a time when you or others in your '
            'household ate only a few kinds of foods because of a lack of money or other '
            'resources?'
        ),
        'hh_x02': (
            'What was the most recent rainy season? (2017/18 or 2018/19)'
        )
    } 
}


In [6]:
def clean_survey(
    year, 
    extra_modules: Dict[str, Tuple[pd.DataFrame, Dict[str, str]]]=None,
    columns_to_drop=None,
    one_hot_encode=True
):

    # extra_modules is a dict from module name to
    #   a pair containing a dataframe and
    #     a dict from covariate name to description

    def merge_and_clean(dataframe, malawi):
        
        if malawi is None:
            malawi = dataframe
        
        else:
            malawi = malawi.merge(dataframe, on='case_id', how='outer', suffixes=('_left', '_right'))
    
            for c in malawi.columns:
                if c.endswith('_left'):
                    c_left = c
                    base = c_left[:-5]
                    c_right = f'{base}_right'
    
                    match = columns_equal(malawi, c_left, c_right)
                    
                    if match:
                        malawi.drop(columns=c_right, inplace=True)
                        malawi.rename(columns={c_left: base}, inplace=True)
                    # geographies are sometimes named and sometimes encoded as integers. If we've got one of each,  
                    # keep the string name: that way it won't accidentally be treated as numeric later.
                    elif (
                        (base in ['region', 'district'])
                        & (
                            pd.api.types.is_numeric_dtype(malawi[c_left]) 
                            + pd.api.types.is_numeric_dtype(malawi[c_right]) 
                            == 1
                          )
                    ):
                        if pd.api.types.is_numeric_dtype(malawi[c_left]):
                            malawi.drop(columns=c_left, inplace=True)
                            malawi.rename(columns={c_right: base}, inplace=True)
                        else:
                            malawi.drop(columns=c_right, inplace=True)
                            malawi.rename(columns={c_left: base}, inplace=True)
                    else:
                        print(f'error merging {file}, mismatch in {base}')
                        malawi.drop(columns=c_right, inplace=True)
                        malawi.rename(columns={c_left: base}, inplace=True)

        return malawi
    
    file_list = file_lists[year]
    currency_conversion_factor = currency_conversion_factors[year]
    roster_path = roster_paths[year]
    description_override = description_overrides[year]
    
    malawi = None
    covariate_labels_to_descriptions = dict()
    covariate_labels_to_modules = dict()

    # Read in survey files
    for file in file_list:
        
        with warnings.catch_warnings():
            warnings.simplefilter('ignore') # TODO: Investigate. Warning thrown from w/in pyreadstat.

            dataframe, metadata =  pyreadstat.read_dta(
                    f'{file}.dta', apply_value_formats=True
            )
    
        covariate_labels_to_descriptions.update(metadata.column_names_to_labels)
        
        for covariate_label in metadata.column_names_to_labels.keys():
            covariate_labels_to_modules[covariate_label] = file.name
    
        malawi = merge_and_clean(dataframe, malawi)

    # Add extra modules
    if extra_modules:
        for module_name, (dataframe, covariate_names_to_descriptions_for_module) in extra_modules.items():

            covariate_labels_to_descriptions.update(covariate_names_to_descriptions_for_module)
            malawi = merge_and_clean(dataframe, malawi)     
    
            for covariate_label in covariate_names_to_descriptions_for_module.keys():
                covariate_labels_to_modules[covariate_label] = module_name

    covariate_labels_to_descriptions.update(description_override)

    if columns_to_drop:
        malawi.drop(columns=columns_to_drop, inplace=True)
    
    # Drop rows that are missing critical fields which we don't want to impute.
    malawi.dropna(subset=['rexpagg'], inplace=True)

    # TODO: Figure out how to detect datetime-like columns automatically
    malawi['interviewDate'] = pd.to_datetime(malawi['interviewDate'])

    # drop entirely nan columns
    malawi.dropna(axis=1, how='all', inplace=True)

    # compute outcome
    ADULT_MIN_AGE = 18
    
    roster, _ =  pyreadstat.read_dta(
        f'{roster_path}.dta', apply_value_formats=True
    )

    roster.columns = [c.lower() for c in roster.columns]
    
    roster['adult'] = roster.hh_b05a >= ADULT_MIN_AGE
    hh_adult_counts = (
        roster[roster.adult].groupby('case_id')[['hhid']].count().rename(columns={'hhid': 'num_adults'})
    )
    hh_child_counts = (
        roster[~roster.adult].groupby('case_id')[['hhid']].count().rename(columns={'hhid': 'num_children'})
    )
    
    malawi = (
        malawi
        .merge(hh_adult_counts, how='left', on='case_id')
        .merge(hh_child_counts, how='left', on='case_id')
    )
    
    malawi[['num_adults', 'num_children']] = (
        malawi[['num_adults', 'num_children']].fillna(value=0)
    )

    assert (malawi.num_adults + malawi.num_children <= 0).sum() == 0
    
    malawi["outcome"] = malawi["rexpagg"] * currency_conversion_factor 
    # Could weight children and adults differently here.
    malawi["outcome"] /= (malawi.num_adults + malawi.num_children)
    malawi["outcome"] /= 365

    # columns not to be imputed, coerced to numeric, or one-hot encoded.
    # summary table won't include these either - for now, this seems fine. 
    columns_to_reserve = [
        'case_id', 'hh_wgt', 'interviewDate', 'outcome'
    ]
    malawi_reserved = malawi[columns_to_reserve]

    malawi_to_process = malawi[malawi.columns.difference(columns_to_reserve)].copy()
    
    # coerce columns to numeric that can be coerced
    for c in malawi_to_process.columns:
        malawi_to_process[c] = pd.to_numeric(malawi_to_process[c], errors='ignore')
    
    # coerce known categorical columns to string
    known_categorical = [
        'region', 'district', 'hh_t01', 'hh_t02', 'hh_t03', 'hh_t04', 'ea_id'
    ]
    for c in known_categorical:
        malawi_to_process[c] = malawi_to_process[c].astype(str)
    
    # Compile column summary (before imputing and one-hot encoding)
    missing_counts = malawi_to_process.isnull().sum() + (malawi_to_process == "").sum()  
    means = malawi_to_process.mean(skipna=True, numeric_only=True)
    medians = malawi_to_process.median(skipna=True, numeric_only=True)
    stds = malawi_to_process.std(skipna=True, numeric_only=True)
    
    summary = pd.concat((missing_counts, means, medians, stds), axis=1)
    summary.columns = ['missing_count', 'mean', 'median', 'std']
    summary.reset_index(names='covariate', inplace=True)

    summary['missing_fraction'] = summary.missing_count / len(malawi_to_process)

    # TODO: replace with a dict get() with default
    def interpret_column_name(column_name):
    
        if column_name in covariate_labels_to_descriptions:
            return covariate_labels_to_descriptions[column_name]
    
        return column_name
    
    summary['description'] = summary.covariate.apply(interpret_column_name)
    summary['module'] = summary.covariate.map(covariate_labels_to_modules)
    
    summary.missing_fraction = summary.missing_fraction.round(2)
    summary['median'] = summary['median'].round(2)
    summary['mean'] = summary['mean'].round(2)
    summary['std'] = summary['std'].round(2)
    
    
    # Split into numeric and non-numeric columns
    malawi_numeric = malawi_to_process.select_dtypes(include=[np.number])
    malawi_non_numeric = malawi_to_process.select_dtypes(exclude=[np.number, np.datetime64])
    
    def get_covariate_type(cov):
        
        if cov in malawi_numeric.columns:
            return 'numeric'
        elif cov in malawi_non_numeric.columns:
            return 'categorical'
    
    summary['type'] = summary['covariate'].apply(get_covariate_type)
    covariate_to_columns_map = {
        covariate: [covariate] for covariate in summary.covariate
    }
    
    # impute missing values with the mean. If they have high missingness,
    # add a nan column.
    MISSINGNESS_CUTOFF = 0.15
    covariates_over_cutoff = summary[summary.missing_fraction > MISSINGNESS_CUTOFF].covariate.values
    for covariate in malawi_numeric.columns:
        if covariate in covariates_over_cutoff:
            dummy_column = f'{covariate}_nan'
            malawi_numeric[dummy_column] = malawi_numeric[covariate].isna()
            covariate_to_columns_map[covariate].append(dummy_column)
    
    # This is different from what roshni does: She uses 0 to impute
    # if missingness is >15%. 
    imputer = SimpleImputer(missing_values=np.nan, strategy='mean')
    imputer.fit(malawi_numeric)
    
    columns = malawi_numeric.columns
    malawi_numeric = pd.DataFrame(imputer.transform(malawi_numeric))

    malawi_numeric.columns = columns

    if one_hot_encode:
        # one-hot encode categoricals.
        # This is different from what roshni does. I'm encoding missing values
        # with a category regardless of missing fraction.
        one_hot_encoder = sklearn_preprocessing.OneHotEncoder(
            drop='if_binary', sparse_output=False
        ).fit(malawi_non_numeric)
        encoded_data = one_hot_encoder.transform(malawi_non_numeric)
        malawi_non_numeric_encoded = pd.DataFrame(encoded_data)
        malawi_non_numeric_encoded.columns = one_hot_encoder.get_feature_names_out()
        
        # populate the map from original column names to the list of one-hot columns. 
        for i in range(len(one_hot_encoder.feature_names_in_)):
        
            covariate = one_hot_encoder.feature_names_in_[i]
            categories = one_hot_encoder.categories_[i]
        
            if one_hot_encoder.drop_idx_[i] is not None:
                categories = np.delete(categories, one_hot_encoder.drop_idx_[i])
        
            covariate_to_columns_map[covariate] = [
                f'{covariate}_{category}' for category in categories
            ]
        
        malawi = malawi_reserved.join(malawi_numeric).join(malawi_non_numeric_encoded)

    else:
        malawi = malawi_reserved.join(malawi_numeric).join(malawi_non_numeric)

    # populate summary columns
    summary['columns'] = summary.covariate.map(covariate_to_columns_map)

    return malawi, summary


### Clean 2016 and 2019 LSMS

#### 2016

In [164]:
malawi_2016, summary_2016 = clean_survey(2016)

error merging /home/selker/eop/data/malawi/MWI_2016_IHS-IV_v04_M_STATA14/agriculture/ag_mod_r2, mismatch in hhid
error merging /home/selker/eop/data/malawi/MWI_2016_IHS-IV_v04_M_STATA14/agriculture/ag_mod_e3, mismatch in hhid
error merging /home/selker/eop/data/malawi/MWI_2016_IHS-IV_v04_M_STATA14/consumption_aggregate/ihs4 consumption aggregate, mismatch in region


  malawi_to_process[c] = pd.to_numeric(malawi_to_process[c], errors='ignore')


#### 2019

##### Process durable & ag goods columns

In [7]:
extra_modules = dict()

durable_goods, durable_goods_metadata = pyreadstat.read_dta(
    survey_directories[2019] / 'HH_MOD_L.dta',apply_value_formats=True
)

durable_goods_pivoted = durable_goods.pivot_table(
    index='case_id', 
    columns='hh_l02', 
    values='hh_l03', 
    aggfunc='sum', 
    fill_value=0,
    observed=True # to avoid a warning
).add_prefix('durable_asset_')
durable_goods_pivoted.columns.name = None
durable_goods_pivoted = durable_goods_pivoted.loc[:, durable_goods_pivoted.sum(axis=0) > 0]
durable_goods_covariate_to_desciption = dict()

for covariate in durable_goods_pivoted.columns:
    durable_goods_covariate_to_desciption[covariate] = f'number owned: {covariate}'


ag_goods, ag_goods_metadata = pyreadstat.read_dta(
    survey_directories[2019] / 'HH_MOD_M.dta',apply_value_formats=True
)

ag_goods.hh_m0b = ag_goods.hh_m0b.astype(str)

ag_goods.loc[ag_goods.hh_m0b == 'OTHER', 'hh_m0b'] = ag_goods[ag_goods.hh_m0b == 'OTHER']['hh_m0b_oth']

ag_goods_pivoted = ag_goods.pivot_table(
    index='case_id', 
    columns='hh_m0b', 
    values='hh_m01', 
    aggfunc='sum', 
    fill_value=0,
    observed=True # to avoid a warning
).add_prefix('ag_asset_')
ag_goods_pivoted.columns.name = None
ag_goods_pivoted = ag_goods_pivoted.loc[:, ag_goods_pivoted.sum(axis=0) > 0]
ag_goods_covariate_to_description = dict()

for covariate in ag_goods_pivoted.columns:
    ag_goods_covariate_to_description[covariate] = f'number owned: {covariate}'

durable_goods_pivoted.reset_index(inplace=True)
ag_goods_pivoted.reset_index(inplace=True)

extra_modules['HH_MOD_L_durable_goods'] = (durable_goods_pivoted, durable_goods_covariate_to_desciption)
extra_modules['HH_MOD_M_ag_goods'] = (ag_goods_pivoted, ag_goods_covariate_to_description)

  durable_goods_pivoted = durable_goods.pivot_table(
  ag_goods_pivoted = ag_goods.pivot_table(


Asset index

In [9]:
if True:
    all_assets = durable_goods_pivoted.merge(
        ag_goods_pivoted, on='case_id', how='outer'
    )
    for_asset_index = all_assets.copy()

else:
    for_asset_index = durable_goods_pivoted.copy()

pca_input_columns = [c for c in for_asset_index.columns if c != 'case_id']

for_asset_index[pca_input_columns] = (
    for_asset_index[pca_input_columns] - for_asset_index[pca_input_columns].mean()
) / for_asset_index[pca_input_columns].std()

pca = sklearn_decomposition.PCA(n_components=1)

asset_index = pca.fit_transform(for_asset_index[pca_input_columns])
for_asset_index['asset_index'] = asset_index
for_asset_index_all = for_asset_index
# Drop a clear outlier. TODO: Discuss w/ josh
for_asset_index = for_asset_index.loc[for_asset_index.case_id !='210334510158']

extra_modules['asset_index'] = (
    for_asset_index[['case_id', 'asset_index']], 
    {'asset_index': 'PCA asset index'}
)

In [None]:
with pd.option_context('display.max_rows', 300, 'display.max_colwidth', 1):

    display(all_assets[all_assets.case_id =='210334510158'].sum())

Geo

In [136]:
mosaiks_by_case_id = pd.read_parquet('2019_mosaiks_by_case_id')

mosaiks_summary = dict()
for c in mosaiks_by_case_id.columns:
    if c.startswith('mosaiks_'):
        mosaiks_summary[c] = f'mosaiks feature {c[8:]}'

extra_modules['mosaiks'] = (mosaiks_by_case_id, mosaiks_summary)


##### Interpret rent unit column

In [137]:
mod_f, _ = pyreadstat.read_dta(
    survey_directories[2019] / 'HH_MOD_F.dta', apply_value_formats=True
)
def compute_yearly_rent(row):

    rent_amount = row.hh_f03a
    unit = row.hh_f03b
    
    if np.isnan(rent_amount):
        yearly_rent = rent_amount   
    elif unit == 'DAY':
        yearly_rent = rent_amount * 365
    elif unit == 'WEEK':
        yearly_rent = rent_amount * (365 / 7)
    elif unit == 'MONTH':
        yearly_rent = rent_amount * 12
    elif unit == 'YEAR':
        yearly_rent = rent_amount
    else:
        yearly_rent = np.nan

    return pd.Series([row.case_id, yearly_rent])
yearly_rent = mod_f.apply(compute_yearly_rent, axis=1)
yearly_rent.columns = ['case_id', 'yearly_rent']


yearly_rent.case_id = yearly_rent.case_id.astype(str)
extra_modules['HH_MOD_F'] = (yearly_rent, {'yearly_rent': 'yearly rent'})
columns_to_drop = ['hh_f03a', 'hh_f03b']

In [138]:
malawi_2019, summary_2019 = clean_survey(
    2019, extra_modules, columns_to_drop, one_hot_encode=True
)

error merging /home/selker/eop/data/malawi/MWI_2019_IHS-V_v06_M_Stata/ag_mod_e3, mismatch in HHID
error merging /home/selker/eop/data/malawi/MWI_2019_IHS-V_v06_M_Stata/ihs5_consumption_aggregate, mismatch in region
error merging /home/selker/eop/data/malawi/MWI_2019_IHS-V_v06_M_Stata/ihs5_consumption_aggregate, mismatch in district


  malawi_to_process[c] = pd.to_numeric(malawi_to_process[c], errors='ignore')


##### Write results

In [140]:
if True:
    out_path = Path('/home/selker/eop/data/malawi')
    malawi_2019.to_parquet(out_path / 'malawi_cleaned_2019.parquet', index=False)
    summary_2019.set_index('covariate', drop=True).to_parquet(out_path / 'malawi_summary_2019.parquet')

### Calculate baseline poverty rate

In [95]:
out_path = Path('/home/selker/eop/data/malawi')
malawi_stashed = pd.read_parquet(out_path / 'malawi_cleaned_2016.parquet')
summary_stashed = pd.read_parquet(out_path / 'malawi_summary_2016.parquet')

In [7]:
poverty_line = 2.15
below = len(malawi[malawi.outcome < poverty_line])
total = len(malawi)

rate = below / total
display(rate)

0.6417307692307692

## 2018 Census

In [64]:
census = pd.read_csv(malawi_directory / 'census_2018_pop_tables.csv', header=1)
census.dropna(subset='Area', inplace=True)

In [72]:
regions = [c[7:] for c in malawi.columns if 'region' in c]
districts = [c[9:] for c in malawi.columns if 'district' in c]

In [68]:
regions

['Central', 'North', 'Southern']

In [63]:
display(census)

Unnamed: 0,Area,Total,Less than 1 Year,1-4,5-9,10-14,15-19,20-24,25-29,30-34,...,Unnamed: 11,35-39,40-44,45-49,50-54,55-59,60-64,65-69,70-74,75+
0,Malawi,17563749,522802,2029604,2632878,2533303,2035945,1651576,1229411,1107226,...,Malawi,968998,729600,535868,387812,306921,234918,240551,144788,271548
1,,,,,,,,,,,...,,,,,,,,,,
2,Northern,2289780,63749,255335,339840,340115,274854,219632,154384,139341,...,Northern,117574,94666,69657,57721,43661,34645,28236,20607,35763
3,Chitipa,234927,6313,25492,36269,35726,28094,21979,15796,13812,...,Chitipa,11381,9576,7703,6130,4438,3233,2914,2123,3948
4,TA Mwabulambya,67232,1695,6957,10188,10204,8145,6555,4536,3975,...,TA Mwabulambya,3251,2723,2245,1893,1276,966,864,627,1132
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
436,Green Corner Ward,22609,581,2116,2675,2706,2619,2678,2118,1960,...,Green Corner Ward,1646,1258,743,479,333,250,210,102,135
437,Soche West Ward,37847,1017,3429,4393,4334,4195,4627,3844,3525,...,Soche West Ward,2968,1963,1229,807,547,404,272,149,144
438,Namiyango Ward,48642,1302,4952,6112,6187,5775,5680,4567,4000,...,Namiyango Ward,3286,2212,1452,1007,689,567,428,206,220
439,Chigumula Ward,24869,724,2578,3241,3280,2804,2607,2031,1833,...,Chigumula Ward,1671,1293,769,556,414,344,279,178,267
