In [1]:
# Required libraries
import tempfile, os

import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

import warnings

import rasterio
from rasterio.mask import mask
from rasterio.enums import Resampling
from rasterio.plot import show, show_hist

import xarray as xr
import rioxarray as rxr

from rasterstats import zonal_stats, utils

import requests
import json

import contextily as ctx
# from contextily import Place

# Addresses SSL error when interacting with worldpop data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Common directories
DATA_DIR = "../data/"
SAR_loc = "C:/development/CDCC-data/SAR/"

In [3]:
# Stub for user input
# TODO: Make this nicer and more accessible for users

country = "NPL"
exp_cat = ["population", "land cover"]
time_horizon = [2050, 2080]
rcp_scenario = ["2.6", "4.5", "6.5", "8.5"]

# Settings
agg_criteria = ["max", "mean"]
class_range = range(3, 11)  # remember that python uses end-exclusive range, so this is 3-10

selected_bin_edges = [0.5, 1, 1.5, 2, 2.5, 3]
min_haz_threshold = np.min(selected_bin_edges)  # determine min/max values from user-selected edges
max_haz_threshold = np.max(selected_bin_edges)
selected_bin_edges += [np.inf]

num_bins = len(selected_bin_edges)-1  # default number of bins, within the range of `class_range`

Load country boundaries from ADM geopackage file which includes ISO3 code related to country name

In [4]:
valid_RPs = [10, 100, 1000]
rp = 100  # selected RP for testing


country_bounds = gpd.read_file(os.path.join(SAR_loc, "ADM_012.gpkg"))

adm2_RP_dataset = gpd.read_file(os.path.join(SAR_loc, f"ADM2_RP{rp}.gpkg"))

# TODO: Make ADM2 selection more generic for flexible notebooks
# Currently hardcoding country selection (175 == Nepal)
npl_adm2 = adm2_RP_dataset.loc[adm2_RP_dataset.ADM0_CODE == 175, :]

In [5]:
# Load or save ISO3 country list
iso3_path = f"{DATA_DIR}cache/iso3.json"
if not os.path.exists(iso3_path):
    resp = json.loads(requests.get(f"https://www.worldpop.org/rest/data/pop/wpgp?iso3={country}").text)

    with open(iso3_path, 'w') as outfile:
        json.dump(resp, outfile)
else:
    with open(iso3_path, 'r') as infile:
        resp = json.load(infile)

In [6]:
# TODO: User to select population data set
# Target population data files are extracted from the JSON list downloaded above
metadata = resp['data'][1]
data_src = metadata['files']

In [7]:
# Save population data to cache location
for data_fn in tqdm(data_src):
    fid = metadata['id']
    cache_fn = os.path.basename(data_fn)
    if f"{fid}_{cache_fn}" in os.listdir(f"{DATA_DIR}/cache"):
        warnings.warn(f"Found {fid}_{cache_fn} in cache, skipping...")
        continue

    # write to file
    with open(f"{DATA_DIR}/cache/{fid}_{cache_fn}", "wb") as handle:
        response = requests.get(data_fn)
        handle.write(response.content)

100%|██████████| 1/1 [00:00<00:00, 1049.10it/s]


In [8]:
# TESTING FILE
# pop_fn = f"{DATA_DIR}/cache/{fid}_{cache_fn}"
pop_fn = f"{DATA_DIR}/cache/WorldPop20_NPL_ppp_UNadj_constrained.tif"
pop_data = rxr.open_rasterio(pop_fn)


# TODO: Temp data store, to be replaced with a config spec (.env file?) before deployment
flood_RP_data_loc = "C:/development/CDCC-data/SAR/HZD/Flood/NPL/"

flood_data = rxr.open_rasterio(flood_RP_data_loc+f"RP{rp}.tif")

In [9]:
valid_RPs = [10, 100, 1000]
rp = 100  # selected RP for testing

# TESTING FILE
# pop_fn = f"{DATA_DIR}/cache/{fid}_{cache_fn}"
pop_fn = f"{DATA_DIR}/cache/WorldPop20_NPL_ppp_UNadj_constrained.tif"
pop_data = rxr.open_rasterio(pop_fn, masked=True)


# TODO: Temp data store, to be replaced with a config spec (.env file?) before deployment
flood_RP_data_loc = "C:/development/CDCC-data/SAR/HZD/Flood/NPL/"
flood_data = rxr.open_rasterio(flood_RP_data_loc+f"RP{rp}.tif", masked=True)

# Reproject and clip raster to same bounds as population data
flood_data = flood_data.rio.reproject_match(pop_data)
flood_data = flood_data.rio.clip_box(*pop_data.rio.bounds())



In [10]:
# Create result dataframe
num_rows = len(npl_adm2.index)
adm_details = {
    "ADM2_CODE": npl_adm2.ADM2_CODE,
    "ADM2_NAME": npl_adm2.ADM2_NAME,
}

# Specify initial columns for class bins
class_bin_cols = {f"RP{rp}_c{i}_pop_sum": np.zeros(num_rows) for i in range(1, len(selected_bin_edges))}

# Merge dictionaries and create dataframe
df_details = adm_details | class_bin_cols

result_df = pd.DataFrame(df_details)

In [11]:
# Full raster of bin classes
pop_array = pop_data[0].values
fld_array = flood_data[0].values

# fld_array[np.isnan(fld_array)] = 0  # Set NaNs to 0
fld_array[fld_array < min_haz_threshold] = 0  # Set negative values to 0
fld_array[fld_array > max_haz_threshold] = max_haz_threshold  # Cap large values to maximum threshold value

# Assign bin values to raster data
bin_idx = np.digitize(fld_array, selected_bin_edges)
bin_rst = xr.DataArray(np.array([bin_idx]).astype(np.int32), coords=flood_data.coords, dims=flood_data.dims)

# Saving raster of class bins for checking
bin_rst.rio.set_crs("epsg:4326")
bin_rst.rio.to_raster("pop_flood_bin.tif", compress="lzw")

In [12]:
pop_data.rio.write_nodata(0, inplace=True)

# pop_haz_rasters = []
for row in result_df.itertuples():
    geom = npl_adm2.loc[npl_adm2.ADM2_CODE == row.ADM2_CODE, "geometry"]

    rst = bin_rst.rio.clip(geom, npl_adm2.crs)
    arr = rst.values

    pop_clip = pop_data.rio.clip(geom, npl_adm2.crs)
    pop_arr = pop_clip.values

    # Loop over each bin and sum population affected in each class
    pop_in_bin = list(range(num_bins))
    for idx in pop_in_bin:
        pop_in_bin[idx] = pop_arr[arr == idx+1].sum()
        # pop_haz_rasters.append(xr.DataArray(pop_arr * np.ma.masked_not_equal(arr, idx+1), coords=pop_clip.coords, dims=pop_clip.dims))

    # Save class data to dataframe
    # 2:8 indicates columns for RP class sums (columns 2 to 7)
    result_df.iloc[result_df.ADM2_CODE == row.ADM2_CODE, 2:8] = pop_in_bin


In [13]:
# Write table of total population in each class, in each ADM2
result_df.to_csv("RP100_class_example.csv", index=False)

In [14]:
# Write multi-band geotiff indicating population affected by each hazard class.
with rasterio.open(f'npl_hazpop_RP{rp}.tif', 'w', count=num_bins, width=pop_data.rio.width, height=pop_data.rio.height,
                   dtype=np.float32, tiled=True, compress='lzw', transform=pop_data.rio.transform()) as dst:
    for idx in range(1, num_bins+1):
        dst.write(pop_data.where(bin_rst.values == idx)[0], idx)
        dst.set_band_description(idx, f'Class {idx}')