## Onboard Coastal Flood Hazard Maps from Joint Research Center (JRC) to OS-C S3 bucket

The data is from the Joint Research Center (JRC) and covers storm surge level return periods for Europe. The data can be downloaded from [JRC](https://data.jrc.ec.europa.eu/dataset/0026aa70-cc6d-4f6f-8c2f-554a2f9b17f2) for eigth different return periods: 5, 10, 20, 50, 100, 200, 500, 1000 years.

## Create Zarr from shape and Affine transformation

<span style="color:blue">Note: this file must be located in /hazard/src/ for the dependencies to work</span>

In [1]:
import sys
import os
import s3fs
import zarr
import numpy as np
import rasterio
import math
import xarray as xr
import math
import pyproj

from pyproj.crs import CRS
from affine import Affine

from hazard.sources.osc_zarr import OscZarr



In [2]:
# https://console-openshift-console.apps.odh-cl1.apps.os-climate.org/k8s/ns/sandbox/secrets/physrisk-dev-s3-keys
# Hazard indicators bucket
default_staging_bucket = 'physrisk-hazard-indicators-dev01'
prefix = 'hazard'

# Acess key and secret key are stored as env vars OSC_S3_HI_ACCESS_KEY and OSC_S3_HI_SECRET_KEY, resp.
s3 = s3fs.S3FileSystem(anon=False, key=os.environ["OSC_S3_HIdev01_ACCESS_KEY"], secret=os.environ["OSC_S3_HIdev01_SECRET_KEY"])

# Define zarr group
zarr_storage = 'hazard_consortium.zarr'
group_path = os.path.join(default_staging_bucket, prefix, zarr_storage).replace('\\','/')
store = s3fs.S3Map(root=group_path, s3=s3, check=False)
root = zarr.group(store=store, overwrite=False) 

# zarr_ storage tree
root.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='wind', nodes=(Node(disabled=True, n…

In [3]:
# List folder files
s3.ls(os.path.join(default_staging_bucket, prefix, zarr_storage).replace('\\','/'))

['physrisk-hazard-indicators-dev01/hazard/hazard_consortium.zarr/.zgroup',
 'physrisk-hazard-indicators-dev01/hazard/hazard_consortium.zarr/wind']

In [4]:
# Create OscZarr object to interact with the bucket.
oscZ = OscZarr(bucket=default_staging_bucket,
        prefix=prefix,
        s3=s3,
        store=store)

In [5]:
# Path to the nc file. 

base_path_hazard = os.path.join(os.getenv("physical_risk_database"), 'hazard')

hazard_type = 'Flood'
datasource = 'JRC'

inputfile_path = os.path.join(base_path_hazard, hazard_type, datasource)
data_filename = 'CoastAlRisk_Europe_EESSL_Historical.nc'

# There is one .nc file with 8 return periods
inputfile = os.path.join(inputfile_path, data_filename)
flood_dph = xr.open_dataset(inputfile)
flood_dph

ecCodes library not found using ['c:\\ProgramData\\Anaconda3\\lib\\site-packages\\ecmwflibs\\eccodes.dll', 'eccodes', 'libeccodes.so', 'libeccodes']


In [6]:
# As you can notice the file privdes data as a vector
# We must create a grid

return_periods = [5, 10, 20, 50, 100, 200, 500, 1000]

lat = flood_dph.latitude.data
lon = flood_dph.longitude.data
ssl = flood_dph.ssl.data
total_size = ssl.shape[0]

In [7]:
# Create latitude and longitude grid
min_lat, max_lat = lat.min(), lat.max()
min_lon, max_lon = lon.min(), lon.max()

total_size = lon.shape[0]
small_size = total_size
grid = np.meshgrid(np.linspace(min_lon, max_lon, total_size), np.linspace(min_lat, max_lat, small_size))

# Create and empty matrix with zeros
ssl_matrix = np.zeros((small_size, total_size, len(return_periods)))

In [8]:
# Save the data 
ssl_matrix_name = os.path.join(inputfile_path, "ssl_matrix.npy")

if "ssl_matrix.npy" not in os.listdir(inputfile_path):
    # Find the nearest point and and the ssl value
    for pos_i in range(total_size):
        lon_i = lon[pos_i]
        lat_i = lat[pos_i]
        ssl_i = ssl[pos_i, :]
        
        aux_min = 500000
        for i in range(small_size):
            for j in range(total_size):
                lon_ij = grid[0][i, j]
                lat_ij = grid[1][i, j]

                dist = math.dist((lon_ij, lat_ij), (lon_i, lat_i))

                if dist < aux_min:
                    aux_min = dist
                    aux_min_i = (i, j)
        
        ssl_matrix[aux_min_i[0], aux_min_i[1], :] = ssl_i
        print(pos_i)

    np.save(ssl_matrix_name, ssl_matrix)
else:
    ssl_matrix = np.load(ssl_matrix_name)

In [9]:
# Define zarr shape and coordinate system
width = ssl_matrix.shape[1]
height = ssl_matrix.shape[0]
shape = (height, width)
crs = str(CRS.from_epsg(4326))

longitudes = grid[0]
latitudes = grid[1]

In [10]:
# Create Affine transformation
min_xs = longitudes.min()
max_xs = longitudes.max()
min_ys = latitudes.min()
max_ys = latitudes.max()

bounds = (min_xs, min_ys, max_xs, max_ys)

# Compute the parameters of the georeference
A = (bounds[2] - bounds[0]) / width # pixel size in the x-direction in map units/pixel
B = 0 # rotation about y-axis
C = 0 # rotation about x-axis
D = -(bounds[3] - bounds[1]) / height # pixel size in the y-direction in map units, almost always negative
E = bounds[0] # x-coordinate of the center of the upper left pixel
F = bounds[3] # y-coordinate of the center of the upper left pixel

transform = Affine(A, B, C, D, E, F)
transform


Affine(0.033095450490633366, 0.0, 0.0,
       -0.019714540588760033, -29.1, 71.3)

In [11]:
# Create data file inside zarr group with name dataset_name

# Name standard is: hazard_type + _ + hazard_subtype (if exists) + '_' + hist or scenario + '_' RP (return period) or event/ emulated + '_' + data_provider
hazard_type = 'inundation_coastal'
data_source_name = 'jrc'
version = 'v1'
dataset_name = 'strom_surge_level_historical_1969_2004_map'
group_path_array = os.path.join(hazard_type, data_source_name, version, dataset_name)
oscZ._zarr_create(path=group_path_array,
                  shape = shape,
                  transform = transform,
                  crs = str(crs),
                  overwrite=False,
                  return_periods=return_periods)

<zarr.core.Array '/inundation_coastal/jrc/v1/strom_surge_level_historical_1969_2004_map' (8, 2242, 2242) float32>

In [12]:
z = oscZ.root[group_path_array]
z.info

0,1
Name,/inundation_coastal/jrc/v1/strom_surge_level_historical_1969_2004_map
Type,zarr.core.Array
Data type,float32
Shape,"(8, 2242, 2242)"
Chunk shape,"(8, 1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.FSStore
No. bytes,160850048 (153.4M)


## Steps to populate hazard.zarr/flood_costal_hist_RP_JRC

### Step 2: Populate the raster file for every return period

In [13]:
chunck_size = 1000

for rt_i in range(len(return_periods)):
    for height_pos in range(0, height, chunck_size):
        for width_pos in range(0, width, chunck_size):

            z[rt_i,height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size] = ssl_matrix[height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size, rt_i]