In [1]:
# Required libraries
import tempfile, os

import numpy as np
import pandas as pd
import geopandas as gpd
from tqdm import tqdm

import warnings

import rasterio
import xarray as xr
import rioxarray as rxr

import requests
import json

import contextily as ctx
# from contextily import Place

# Addresses SSL error when interacting with worldpop data
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
# Common directories
DATA_DIR = "../data/"
SAR_loc = "C:/development/CDCC-data/SAR/"

In [3]:
def damage_factor(x):
    """A polynomial fit to average damage across multiple sectors relative 
    to water depth in meters in Asia.

    The sectors are commercial, industry, transport, agriculture, infrastructure and residential.

    Values are capped between 0 and 1.

    References
    ----------
    .. [1] JRC, 2017
    """
    return np.maximum(0.0, np.minimum(1.0, 0.00723*x**3 - 0.1*x**2 + 0.506*x + 0.023))


In [4]:
# Stub for user input
# TODO: Make this nicer and more accessible for users

country = "NPL"
exp_cat = ["population", "land cover"]
time_horizon = [2050, 2080]
rcp_scenario = ["2.6", "4.5", "6.5", "8.5"]

# Settings
agg_criteria = ["max", "mean"]
class_range = range(3, 11)  # remember that python uses end-exclusive range, so this is 3-10

selected_bin_edges = [0.5, 1, 1.5, 2, 2.5, 3]
min_haz_threshold = np.min(selected_bin_edges)  # determine min/max values from user-selected edges
max_haz_threshold = np.max(selected_bin_edges)
selected_bin_edges += [np.inf]

num_bins = len(selected_bin_edges)-1  # default number of bins, within the range of `class_range`

Load country boundaries from ADM geopackage file which includes ISO3 code related to country name

In [5]:
valid_RPs = [10, 100, 1000]
rp = 100  # selected RP for testing


country_bounds = gpd.read_file(os.path.join(SAR_loc, "ADM_012.gpkg"))

adm2_RP_dataset = gpd.read_file(os.path.join(SAR_loc, f"ADM2_RP{rp}.gpkg"))

# TODO: Make ADM2 selection more generic for flexible notebooks
# Currently hardcoding country selection (175 == Nepal)
npl_adm2 = adm2_RP_dataset.loc[adm2_RP_dataset.ADM0_CODE == 175, :]

In [6]:
# Load or save ISO3 country list
iso3_path = f"{DATA_DIR}cache/iso3.json"
if not os.path.exists(iso3_path):
    resp = json.loads(requests.get(f"https://www.worldpop.org/rest/data/pop/wpgp?iso3={country}").text)

    with open(iso3_path, 'w') as outfile:
        json.dump(resp, outfile)
else:
    with open(iso3_path, 'r') as infile:
        resp = json.load(infile)

In [7]:
# TODO: User to select population data set
# Target population data files are extracted from the JSON list downloaded above
metadata = resp['data'][1]
data_src = metadata['files']

In [8]:
# Save population data to cache location
for data_fn in tqdm(data_src):
    fid = metadata['id']
    cache_fn = os.path.basename(data_fn)
    if f"{fid}_{cache_fn}" in os.listdir(f"{DATA_DIR}/cache"):
        warnings.warn(f"Found {fid}_{cache_fn} in cache, skipping...")
        continue

    # write to file
    with open(f"{DATA_DIR}/cache/{fid}_{cache_fn}", "wb") as handle:
        response = requests.get(data_fn)
        handle.write(response.content)

100%|██████████| 1/1 [00:00<00:00, 501.11it/s]


In [9]:
# TESTING FILE
# pop_fn = f"{DATA_DIR}/cache/{fid}_{cache_fn}"
pop_fn = f"{DATA_DIR}/cache/WorldPop20_NPL_ppp_UNadj_constrained.tif"
pop_data = rxr.open_rasterio(pop_fn)


# TODO: Temp data store, to be replaced with a config spec (.env file?) before deployment
flood_RP_data_loc = "C:/development/CDCC-data/SAR/HZD/Flood/NPL/"

flood_data = rxr.open_rasterio(flood_RP_data_loc+f"RP{rp}.tif")

In [10]:
valid_RPs = [10, 100, 1000]
rp = 100  # selected RP for testing

# TESTING FILE
# pop_fn = f"{DATA_DIR}/cache/{fid}_{cache_fn}"
pop_fn = f"{DATA_DIR}/cache/WorldPop20_NPL_ppp_UNadj_constrained.tif"
pop_data = rxr.open_rasterio(pop_fn, masked=True)


# TODO: Temp data store, to be replaced with a config spec (.env file?) before deployment
flood_RP_data_loc = "C:/development/CDCC-data/SAR/HZD/Flood/NPL/"
flood_data = rxr.open_rasterio(flood_RP_data_loc+f"RP{rp}.tif", masked=True)

# Reproject and clip raster to same bounds as population data
flood_data = flood_data.rio.reproject_match(pop_data)
flood_data = flood_data.rio.clip_box(*pop_data.rio.bounds())



In [11]:
# Create dataframe to hold results
num_rows = len(npl_adm2.index)
adm_details = {
    "ADM2_CODE": npl_adm2.ADM2_CODE,
    "ADM2_NAME": npl_adm2.ADM2_NAME
}

# Specify initial columns for each RP
affected_pop_cols = {f"RP{rp_i}_pop_sum": np.zeros(num_rows) for rp_i in valid_RPs}
eai_cols = {f"RP{rp_i}_EAI": np.zeros(num_rows) for rp_i in valid_RPs}

# Merge dictionaries and create dataframe
df_details = adm_details | affected_pop_cols | eai_cols

result_df = pd.DataFrame(df_details)

In [12]:
# Full raster of bin classes
pop_array = pop_data[0].values
fld_array = flood_data[0].values

# fld_array[np.isnan(fld_array)] = 0  # Set NaNs to 0
fld_array[fld_array < min_haz_threshold] = np.nan  # Set values below min threshold to nan
# fld_array[fld_array > max_haz_threshold] = max_haz_threshold  # Cap large values to maximum threshold value

# Assign impact factor
impact_array = damage_factor(fld_array)

# Create raster from array (for preview)
impact_rst = xr.DataArray(np.array([impact_array]).astype(np.float32), coords=flood_data.coords, dims=flood_data.dims)

# impact_rst.plot()

In [22]:
pop_data.rio.write_nodata(0, inplace=True)

# Probability of return period
# Essentially the same as 1/RP, but accounts for cases where RP == 1
RPp = 1 - np.exp(-1/rp)

for row in result_df.itertuples():
    geom = npl_adm2.loc[npl_adm2.ADM2_CODE == row.ADM2_CODE, "geometry"]

    rst = impact_rst.rio.clip(geom, npl_adm2.crs)

    pop_clip = pop_data.rio.clip(geom, npl_adm2.crs)
    pop_arr = pop_clip.values

    affected_pop = pop_arr[(rst >= 0, )].sum()  # RPi_pop
    total_pop_in_ADM2 = pop_clip.sum()

    # Get EAI for RP
    RPi_EAI = affected_pop * RPp

    # Save total population affected to dataframe
    result_df.loc[result_df.ADM2_CODE == row.ADM2_CODE, f"RP{rp}_pop_sum"] = affected_pop

    # Save EAI
    result_df.loc[result_df.ADM2_CODE == row.ADM2_CODE, f"RP{rp}_EAI"] = RPi_EAI


# ... after each RP ...
# Sum all RPi_EAI columns for each ADM2: table [ADM2; RP_Pop_EAI]
# ...


In [23]:
# Write table of total population in each class, in each ADM2
result_df.to_csv("npl_RP100_functional_example_results.csv", index=False)