In [21]:
%matplotlib inline

from pathlib import Path
from datetime import date

import rasterio
from rasterio import features
import numpy as np
import scipy as sp
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import geopandas as gpd


import affine

# from scipy import stats
from tqdm import tnrange, tqdm_notebook, tqdm

import population_tools

from config import (DATA_SRC, POP_DATA_SRC)

In [32]:
RESULTS_FOLDER = DATA_SRC/ 'lancet/countries_rasters/'

COUNTRY_SHAPEFILE = DATA_SRC / 'GIS' / 'world'/ 'ne_50m_admin_0_countries' / 'ne_50m_admin_0_countries.shp'

POP_FILE = POP_DATA_SRC / 'population_count_2000-2020_eightres.nc'

# LIMIT to the following countries for now
SELECTED_COUNTRIES = ['IND', 'USA', 'GBR', 'FRA', 'SWE', 'AUS']

In [6]:
# Load a country shapefile for rasterising
COUNTRIES = gpd.read_file(str(COUNTRY_SHAPEFILE))
# Translate to match raster coordinates
COUNTRIES.geometry = COUNTRIES.translate(xoff=180)

# Remove missing UN A3 code fields, convert to int and set as index
COUNTRIES.loc[COUNTRIES.un_a3 == '-099', 'un_a3'] = np.nan
COUNTRIES.un_a3 = COUNTRIES.un_a3.astype(float)
COUNTRIES = COUNTRIES[np.isfinite(COUNTRIES.un_a3)]
COUNTRIES.un_a3 = COUNTRIES.un_a3.astype(int)
COUNTRIES = COUNTRIES.set_index('un_a3')

In [36]:
def rasterize_and_save_countries(pop, country_table, out_folder):
    
            
    countries = country_table[['geometry']].copy()
    countries['iso_a3'] = country_table.index
    
    raster = population_tools.rasterize_data(pop.population, countries, 'iso_a3')
    raster = raster * pop.water_mask.squeeze().values
#     raster.shape = (*raster.shape, 1)
    raster = xr.DataArray(raster, coords=[pop.latitude, pop.longitude], 
                        dims=['latitude', 'longitude'], name='country_id')

    # For some reason, Panpoly viewer doesn't like 'years' as int64 but accepts int32:/
#     raster['iso_a3'] = raster['iso_a3'].astype(np.int32)

    raster.to_netcdf(str(out_folder / f'country_all.nc'))


def rasterize_and_save_country(iso_a3, pop, country_row, out_folder):        
#     country = country_row[['geometry']].copy()
    
    
    affine = population_tools.get_affine(pop.population)

    raster = features.rasterize(
        [(country_row.geometry, 1)],
        out_shape=pop.population.shape[:2],
        transform=affine,
        all_touched=True
    )
    # Roll the result to fix affine oddity
    raster = np.roll(raster, -raster.shape[1] // 2, axis=1)

    raster = raster * pop.water_mask.squeeze().values
    raster.shape = (*raster.shape, 1)
    raster = xr.DataArray(raster, coords=[pop.latitude, pop.longitude, [iso_a3]], 
                        dims=['latitude', 'longitude', 'iso_a3'], name='country_id')

    # For some reason, Panpoly viewer doesn't like 'years' as int64 but accepts int32:/
    raster['iso_a3'] = raster['iso_a3'].astype(np.int32)

    raster.to_netcdf(str(out_folder / f'country_{iso_a3}.nc'))





In [30]:
with population_tools.load_masked_population(POP_FILE) as pop_file:
    rasterize_and_save_countries(pop_file, COUNTRIES, RESULTS_FOLDER)

In [38]:
tmp = COUNTRIES[COUNTRIES.adm0_a3.isin(SELECTED_COUNTRIES)]

with population_tools.load_masked_population(POP_FILE) as pop_file:
    
    for idx, row in tmp.iterrows():
        rasterize_and_save_country(idx, pop_file,
                                    row, 
                                       RESULTS_FOLDER)