In [None]:
import os
from glob import glob

import fiona
import geopandas
import numpy as np
import rasterio
import rasterstats

In [None]:
from preprocess_utils import load_config
CONFIG = load_config()
CONFIG['paths']

In [None]:
boundaries_fname = os.path.join(CONFIG['paths']['data'], 'boundaries', 'admin_boundaries.gpkg')

In [None]:
for layer in fiona.listlayers(boundaries_fname):
    print(geopandas.read_file(boundaries_fname, layer=layer).head(1))

In [None]:
parishes = geopandas.read_file(boundaries_fname, layer='admin1')
parishes.crs

In [None]:
adm1 = geopandas.read_file(boundaries_fname, layer='admin1') \
    .to_crs("EPSG:4326")
adm1['SOURCE_POP'] = adm1.POP2001

In [None]:
adm2 = geopandas.read_file(boundaries_fname, layer='admin2') \
    .to_crs("EPSG:4326")
adm2['SOURCE_POP'] = adm2.POP2001_

In [None]:
adm3 = geopandas.read_file(boundaries_fname, layer='admin3') \
    .to_crs("EPSG:4326")
adm3['SOURCE_POP'] = adm3.TOTAL_MLE + adm3.TOTAL_FMLE

In [None]:
def add_pop(boundaries):
    for year in [2001, 2011, 2020]:
        pop_fname = os.path.join(CONFIG['paths']['incoming_data'], 'worldpop', f'jam_ppp_{year}.tif')
        colname = f"pop_{year}"
        boundaries[colname] = [
            stats['sum'] for stats in
            rasterstats.gen_zonal_stats(boundaries.geometry, pop_fname, stats='sum')
        ]
    boundaries['abs_diff'] = np.abs(boundaries['SOURCE_POP'] - boundaries['pop_2001'])
    boundaries['rel_diff'] = np.abs(boundaries['SOURCE_POP'] - boundaries['pop_2001']) / boundaries['pop_2001']
    
    print(boundaries.rel_diff.describe())
    print("")
    print(boundaries[['pop_2001', 'pop_2011', 'pop_2020', 'SOURCE_POP']].sum())
    print("\n")    
    
    return boundaries

In [None]:
adm1_pop = add_pop(adm1)

In [None]:
adm2_pop = add_pop(adm2)

In [None]:
adm3_pop = add_pop(adm3)

In [None]:
output = adm3_pop[['ED_CLASS', 'ED_ID', 'PARISH', 'CONST_NAME', 'ED', 'TOTAL_POP', 'pop_2020', 'geometry']] \
    .copy() \
    .to_crs("EPSG:3448")

In [None]:
output.to_file(
    os.path.join(CONFIG['paths']['data'], 'population', 'population.gpkg'), 
    layer='admin3', 
    driver='GPKG')