## Onboard River Flood Hazard Maps from Joint Research Center (JRC) to OS-C S3 bucket

The data is flood depth historical return period data and can be found at [JRC data catalog](https://data.jrc.ec.europa.eu/dataset/1d128b6c-a4ee-4858-9e34-6210707f3c81). The methodology is detailed at ["A new dataset of river flood hazard maps for Europe and the Mediterranean Basin" by  Francesco Dottori, Lorenzo Alfieri, Alessandra Bianchi, Jon Skoien, and Peter Salamon](https://essd.copernicus.org/articles/14/1549/2022/).

The provide six different return periods: 10, 20, 50, 100, 200 and 500 years.

The resolution is 100m.

## Create Zarr from shape and Affine transformation

<span style="color:blue">Note: this file must be located in /hazard/src/ for the dependencies to work</span>

In [1]:
import sys
import os
import s3fs
import zarr
import numpy as np
import rasterio
import math
import xarray as xr

import pyproj
from pyproj.crs import CRS
from affine import Affine

from hazard.sources.osc_zarr import OscZarr



In [2]:
# https://console-openshift-console.apps.odh-cl1.apps.os-climate.org/k8s/ns/sandbox/secrets/physrisk-dev-s3-keys
# Hazard indicators bucket
default_staging_bucket = 'physrisk-hazard-indicators-dev01'
prefix = 'hazard'

# Acess key and secret key are stored as env vars OSC_S3_HI_ACCESS_KEY and OSC_S3_HI_SECRET_KEY, resp.
s3 = s3fs.S3FileSystem(anon=False, key=os.environ["OSC_S3_HIdev01_ACCESS_KEY"], secret=os.environ["OSC_S3_HIdev01_SECRET_KEY"])

# Define zarr group
zarr_storage = 'hazard_consortium.zarr'
group_path = os.path.join(default_staging_bucket, prefix, zarr_storage).replace('\\','/')
store = s3fs.S3Map(root=group_path, s3=s3, check=False)
root = zarr.group(store=store, overwrite=False) 

# zarr_ storage tree
root.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='inundation_coastal', nodes=(Node(di…

In [3]:
# List folder files
s3.ls(os.path.join(default_staging_bucket, prefix).replace('\\','/'))

['physrisk-hazard-indicators-dev01/hazard/hazard.zarr',
 'physrisk-hazard-indicators-dev01/hazard/hazard_consortium.zarr',
 'physrisk-hazard-indicators-dev01/hazard/riverflood_JRC_RP_hist.zarr']

In [4]:
# Create OscZarr object to interact with the bucket.
oscZ = OscZarr(bucket=default_staging_bucket,
        prefix=prefix,
        s3=s3,
        store=store)

In [5]:
# Path to the tif files. There is one tif file per return period

base_path_hazard = os.path.join(os.getenv("physical_risk_database"), 'hazard')

hazard_type = 'Flood'
datasource = 'JRC'

inputfile_path = os.path.join(base_path_hazard, hazard_type, datasource)

In [6]:
# Read one tif file to get the metadata: transform, crs, width, height and shape

return_period = '010'
data_filename = 'floodmap_EFAS_RP{}_C.tif'.format(return_period, return_period)
inputfile = os.path.join(inputfile_path, data_filename)

src = rasterio.open(inputfile)

transform = src.transform
crs = CRS.from_epsg(3035)
width = src.width
height = src.height
shape = (height, width)

return_periods_str = ['010', '020', '050', '100', '200', '500']
return_periods = [int(rt) for rt in return_periods_str]

src.close()

In [8]:
proj = pyproj.Transformer.from_crs(3035, 4326, always_xy=True, authority='EPSG')
cols, rows = np.meshgrid([0, width-1], [0, height-1])
xs, ys = rasterio.transform.xy(transform, rows, cols)
longitudes, latitudes = proj.transform(np.array(xs),  np.array(ys))

In [9]:
crs = str(CRS.from_epsg(4326))

# Create Affine transformation
min_xs = longitudes.min()
max_xs = longitudes.max()
min_ys = latitudes.min()
max_ys = latitudes.max()

bounds = (min_xs, min_ys, max_xs, max_ys)
size = shape

# Compute the parameters of the georeference
A = (bounds[2] - bounds[0]) / size[0] # pixel size in the x-direction in map units/pixel
B = 0 # rotation about y-axis
C = 0 # rotation about x-axis
D = -(bounds[3] - bounds[1]) / size[1] # pixel size in the y-direction in map units, almost always negative
E = bounds[0] # x-coordinate of the center of the upper left pixel
F = bounds[3] # y-coordinate of the center of the upper left pixel

transform = Affine(A, B, C, D, E, F)
transform


Affine(0.002798819336299164, 0.0, 0.0,
       -0.0005430512923345995, -55.508337433961465, 59.23743791216032)

In [12]:
# Create data file inside zarr group with name dataset_name

# Name standard is: hazard_type + _ + hazard_subtype (if exists) + '_' + hist or scenario + '_' RP (return period) or event/ emulated + '_' + data_provider
hazard_type = 'inundation_river'
data_source_name = 'jrc'
version = 'v1'
dataset_name = 'flood_depth_historical_1990_2016_map'
group_path_array = os.path.join(hazard_type, data_source_name, version, dataset_name)
oscZ._zarr_create(path=group_path_array,
                  shape = shape,
                  transform = transform,
                  crs = str(crs),
                  overwrite=False,
                  return_periods=return_periods)

<zarr.core.Array '/inundation_river/jrc/v1/flood_depth_historical_1990_2016_map' (6, 45242, 63976) float32>

In [13]:
z = oscZ.root[group_path_array]
z.info

0,1
Name,/inundation_river/jrc/v1/flood_depth_historical_1990_2016_map
Type,zarr.core.Array
Data type,float32
Shape,"(6, 45242, 63976)"
Chunk shape,"(6, 1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.FSStore
No. bytes,69465652608 (64.7G)


## Steps to populate hazard.zarr/flood_river_hist_RP_JRC

### Step 1: Read tif files

In [14]:
def read_window(src, height_pos, width_pos, chunck_size):
    """
    Read JRC data.

    Parameters:
        path_to_file (str): full path to tif file.

    Returns:
        fld_depth (numpy array): flood depth at (x1, y1) 3035 EPSG coordinates

    """
    window = rasterio.windows.Window(width_pos, height_pos, chunck_size, chunck_size)
    band = src.read(1, window=window)

    to_impute = band == src.nodata
    band[to_impute] = 0

    return band

### Step 2: Populate the raster file for every return period

In [15]:
chunck_size = 1000

for rt_i, rt in enumerate(return_periods_str):

    data_filename = 'floodmap_EFAS_RP{}_C.tif'.format(rt, rt)
    inputfile = os.path.join(inputfile_path, data_filename)

    src = rasterio.open(inputfile)

    #da.data[rt_i,:,:] = fld_depth
    for height_pos in range(0, height, chunck_size):
        for width_pos in range(0, width, chunck_size):

            band = read_window(src, height_pos, width_pos, chunck_size)

            z[rt_i,height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size] = band