# Re-estimate total population

Take ADM3 enumeration districts with population estimates from 2011, sum across columns and take the maximum estimated population out of:
1. total as reported
2. total male plus total female
3. total of age/gender breakdown columns

This fixes several EDs having zero values for total population

In [None]:
import os

import geopandas

In [None]:
from preprocess_utils import load_config
CONFIG = load_config()
CONFIG['paths']

In [None]:
boundaries_fname = os.path.join(CONFIG['paths']['data'], 'boundaries', 'admin_boundaries.gpkg')

In [None]:
adm3 = geopandas.read_file(boundaries_fname, layer='admin3')

In [None]:
adm3.columns

In [None]:
adm3['total_gender'] = adm3.TOTAL_MLE + adm3.TOTAL_FMLE
adm3['total_age'] = adm3[[
    'F0_4_FMLE', 'F5_9_FMLE', 'F10_14_FMLE', 'F15_19_FMLE',
    'F20_24_FMLE', 'F25_29_FMLE', 'F30_34_FMLE', 'F35_39_FMLE',
    'F40_44_FMLE', 'F45_49_FMLE', 'F50_54_FMLE', 'F55_59_FMLE',
    'F60_64_FMLE', 'F65_69_FMLE', 'F70_74_FMLE', 'F75_79_FMLE',
    'F80_84_FMLE', 'F85_89_FMLE', 'F90_94_FMLE', 'F95__FMLE',
    'F0_4_MLE', 'F5_9_MLE', 'F10_14_MLE', 'F15_19_MLE', 'F20_24_MLE',
    'F25_29_MLE', 'F30_34_MLE', 'F35_39_MLE', 'F40_44_MLE', 'F45_49_MLE',
    'F50_54_MLE', 'F55_59_MLE', 'F60_64_MLE', 'F65_69_MLE', 'F70_74_MLE',
    'F75_79_MLE', 'F80_84_MLE', 'F85_89_MLE', 'F90_94_MLE', 'F95__MLE'
]].sum(axis=1)

In [None]:
adm3['population'] = adm3[['TOTAL_POP', 'total_gender','total_age']].max(axis=1)

In [None]:
adm3[['ED', 'TOTAL_POP', 'total_gender','total_age']][adm3.TOTAL_POP == 0]

In [None]:
adm3[['ED', 'TOTAL_POP', 'total_gender','total_age']].set_index('ED').sort_values(by='TOTAL_POP').plot()

In [None]:
output = adm3[['ED_CLASS', 'ED_ID', 'AREA', 'PERIMETER', 'PARISH', 'CONST_NAME', 'ED', 'population', 'geometry']] \
    .copy()

In [None]:
output.to_file(
    os.path.join(CONFIG['paths']['data'], 'population', 'population.gpkg'), 
    layer='admin3', 
    driver='GPKG')