## Onboard Coastal Flood Hazard Maps from Joint Research Center (JRC) to OS-C S3 bucket

The data is from the Joint Research Center (JRC) and covers storm surge level return periods for Europe. The data can be downloaded from [JRC](https://data.jrc.ec.europa.eu/dataset/0026aa70-cc6d-4f6f-8c2f-554a2f9b17f2) for eigth different return periods: 5, 10, 20, 50, 100, 200, 500, 1000 years.

## Create Zarr from shape and Affine transformation

<span style="color:blue">Note: this file must be located in /hazard/src/ for the dependencies to work</span>

In [1]:
import sys
import os
import s3fs
import zarr
import numpy as np
import rasterio
import math
import xarray as xr
import math
import pyproj

from pyproj.crs import CRS
from affine import Affine

from hazard.sources.osc_zarr import OscZarr



In [2]:
# https://console-openshift-console.apps.odh-cl1.apps.os-climate.org/k8s/ns/sandbox/secrets/physrisk-dev-s3-keys
# Hazard indicators bucket
default_staging_bucket = 'physrisk-hazard-indicators-dev01'
prefix = 'hazard'

# Acess key and secret key are stored as env vars OSC_S3_HI_ACCESS_KEY and OSC_S3_HI_SECRET_KEY, resp.
s3 = s3fs.S3FileSystem(anon=False, key=os.environ["OSC_S3_HIdev01_ACCESS_KEY"], secret=os.environ["OSC_S3_HIdev01_SECRET_KEY"])

# Define zarr group
zarr_storage = 'hazard.zarr'
group_path = os.path.join(default_staging_bucket, prefix, zarr_storage).replace('\\','/')
store = s3fs.S3Map(root=group_path, s3=s3, check=False)
root = zarr.group(store=store, overwrite=False) 

# zarr_ storage tree
root.tree()

Tree(nodes=(Node(disabled=True, name='/', nodes=(Node(disabled=True, name='physrisk-hazard-indicators-dev01', …

In [3]:
# List folder files
s3.ls(os.path.join(default_staging_bucket, prefix).replace('\\','/'))

['physrisk-hazard-indicators-dev01/hazard/hazard.zarr',
 'physrisk-hazard-indicators-dev01/hazard/riverflood_JRC_RP_hist.zarr']

In [4]:
# Create OscZarr object to interact with the bucket.
oscZ = OscZarr(bucket=default_staging_bucket,
        prefix=prefix,
        s3=s3,
        store=store)

In [5]:
# Path to the nc file. 

base_path_hazard = os.path.join(os.getenv("physical_risk_database"), 'hazard')

hazard_type = 'Flood'
datasource = 'JRC'

inputfile_path = os.path.join(base_path_hazard, hazard_type, datasource)
data_filename = 'CoastAlRisk_Europe_EESSL_Historical.nc'

# There is one .nc file with 8 return periods
inputfile = os.path.join(inputfile_path, data_filename)
flood_dph = xr.open_dataset(inputfile)
flood_dph

ecCodes library not found using ['c:\\ProgramData\\Anaconda3\\lib\\site-packages\\ecmwflibs\\eccodes.dll', 'eccodes', 'libeccodes.so', 'libeccodes']


In [6]:
# As you can notice the file privdes data as a vector
# We must create a grid

return_periods = [5, 10, 20, 50, 100, 200, 500, 1000]
rp_index = 0 # []
rp = return_periods[rp_index]
lat = flood_dph.latitude.data
lon = flood_dph.longitude.data
ssl = flood_dph.ssl.data[:,rp_index]
total_size = ssl.shape[0]

In [10]:
# Create latitude and longitude grid
min_lat, max_lat = lat.min(), lat.max()
min_lon, max_lon = lon.min(), lon.max()

total_size = lon.shape[0]
small_size = 20
grid = np.meshgrid(np.linspace(min_lon, max_lon, total_size), np.linspace(min_lat, max_lat, small_size))

# Create and empty matrix with zeros
ssl_matrix = np.zeros((small_size, total_size))

In [18]:
# Find the nearest point and and the ssl value

for pos_i in range(total_size):
    lon_i = lon[pos_i]
    lat_i = lat[pos_i]
    ssl_i = ssl[pos_i]
    
    aux_min = 500000
    for i in range(small_size):
        for j in range(total_size):
            lon_ij = grid[0][i, j]
            lat_ij = grid[1][i, j]

            dist = math.dist((lon_ij, lat_ij), (lon_i, lat_i))

            if dist < aux_min:
                aux_min = dist
                aux_min_i = (i, j)
    
    ssl_matrix[aux_min_i[0], aux_min_i[1]] = ssl_i

0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
27

In [11]:
# Save the data 
ssl_matrix_name = os.path.join(inputfile_path, "ssl_matrix_rp{}.csv".format(rp))

if "ssl_matrix_rp{}.csv".format(rp) in os.listdir(inputfile_path):
    ssl_matrix = np.genfromtxt(ssl_matrix_name, delimiter=';')
else:
    np.savetxt(ssl_matrix_name, ssl_matrix, delimiter=";")   

In [12]:
width = ssl_matrix.shape[1]
height = ssl_matrix.shape[0]
shape = (height, width)
crs = str(CRS.from_epsg(3035))

In [13]:
proj = pyproj.Transformer.from_crs(4326, 3035, always_xy=True, authority='EPSG')
longitudes = grid[0]
latitudes = grid[1]
xs, ys = proj.transform(np.array(longitudes),  np.array(latitudes))


In [14]:
min_xs = xs.min()
max_xs = xs.max()
min_ys = ys.min()
max_ys = ys.max()

bounds = (min_xs, min_ys, max_xs, max_ys)
size = shape

# Compute the parameters of the georeference
A = (bounds[2] - bounds[0]) / size[0] # pixel size in the x-direction in map units/pixel
B = 0 # rotation about y-axis
C = 0 # rotation about x-axis
D = -(bounds[3] - bounds[1]) / size[1] # pixel size in the y-direction in map units, almost always negative
E = bounds[0] # x-coordinate of the center of the upper left pixel
F = bounds[3] # y-coordinate of the center of the upper left pixel

transform = Affine(A, B, C, D, E, F)


In [15]:
transform

Affine(361389.5488175517, 0.0, 0.0,
       -2354.730564157604, 528610.746339059, 5745690.37633531)

In [16]:
# Create data file inside zarr group with name dataset_name

# Name standard is: hazard_type + _ + hazard_subtype (if exists) + '_' + hist or scenario + '_' RP (return period) or event/ emulated + '_' + data_provider
dataset_name = 'flood_coast_hist_RP_JRC'
group_path_array = os.path.join(group_path, dataset_name)
oscZ._zarr_create(path=group_path_array,
                  shape = shape,
                  transform = transform,
                  crs = str(crs),
                  overwrite=False,
                  return_periods=return_periods)

<zarr.core.Array '/physrisk-hazard-indicators-dev01/hazard/hazard.zarr/flood_coast_hist_RP_JRC' (8, 20, 2242) float32>

In [17]:
z = oscZ.root[group_path_array]
z.info

0,1
Name,/physrisk-hazard-indicators-dev01/hazard/hazard.zarr/flood_coast_hist_RP_JRC
Type,zarr.core.Array
Data type,float32
Shape,"(8, 20, 2242)"
Chunk shape,"(8, 1000, 1000)"
Order,C
Read-only,False
Compressor,"Blosc(cname='lz4', clevel=5, shuffle=SHUFFLE, blocksize=0)"
Store type,zarr.storage.FSStore
No. bytes,1434880 (1.4M)


## Steps to populate hazard.zarr/flood_costal_hist_RP_JRC

### Step 2: Populate the raster file for every return period

In [42]:
chunck_size = 1000

data_filename = 'floodmap_EFAS_RP{}_C.tif'.format(rt, rt)
inputfile = os.path.join(inputfile_path, data_filename)

src = rasterio.open(inputfile)

#da.data[rt_i,:,:] = fld_depth
for height_pos in range(0, height, chunck_size):
    for width_pos in range(0, width, chunck_size):

        band = read_window(src, height_pos, width_pos, chunck_size)

        z[rt_i,height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size] = band

In [36]:
rt_i = 0
rt = '010'

data_filename = 'floodmap_EFAS_RP{}_C.tif'.format(rt, rt)
inputfile = os.path.join(inputfile_path, data_filename)

src = rasterio.open(inputfile)

#da.data[rt_i,:,:] = fld_depth
for height_pos in range(0, height, chunck_size):
    for width_pos in range(0, width, chunck_size):

        band = read_window(src, height_pos, width_pos, chunck_size)

        #z[rt_i,height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size] = fld_depth[0,height_pos:height_pos+chunck_size,width_pos:width_pos+chunck_size]

        z[rt_i,height_pos:height_pos+chunck_size, width_pos:width_pos+chunck_size] = band

In [38]:
z[0,435,0]

-3.402823e+38

In [31]:
size = 3
a = list(range(23))
for pos in range(0, len(a), size):
    print(pos, size, a[pos:pos+size])

0 3 [0, 1, 2]
3 3 [3, 4, 5]
6 3 [6, 7, 8]
9 3 [9, 10, 11]
12 3 [12, 13, 14]
15 3 [15, 16, 17]
18 3 [18, 19, 20]
21 3 [21, 22]


In [33]:
a[21:25]

[21, 22]

In [29]:
z[0,0,0] == src.nodata

True

In [48]:
fld_depth.shape

(1, 45242, 63976)

In [27]:
oscZ.write(path = group_path,
           da = da)

In [None]:
# Example using root object. Better to use oscZ object

"""
create_dataset(name, **kwargs) method of zarr.hierarchy.Group instance
    Create an array.
    
    Arrays are known as "datasets" in HDF5 terminology. For compatibility
    with h5py, Zarr groups also implement the require_dataset() method.
    
    Parameters
    ----------
    name : string
        Array name.
    data : array-like, optional
        Initial data.
    shape : int or tuple of ints
        Array shape.
    chunks : int or tuple of ints, optional
        Chunk shape. If not provided, will be guessed from `shape` and
        `dtype`.
    dtype : string or dtype, optional
        NumPy dtype.
    compressor : Codec, optional
        Primary compressor.
    fill_value : object
        Default value to use for uninitialized portions of the array.



root.create_dataset(name='prueba',
                    data = np.array([[0,1], [1,6]]),
                    shape = (2,2),
                    chunks = (1000, 1000),
                    dtype = 'f4')

trans_members = [
    transform.a,
    transform.b,
    transform.c,
    transform.d,
    transform.e,
    transform.f,
]
mat3x3 = [x * 1.0 for x in trans_members] + [0.0, 0.0, 1.0] # Why adding this ??
root.attrs["crs"] = str(crs)
root.attrs["transform_mat3x3"] = mat3x3 
if return_periods is not None:
    root.attrs["index_values"] = return_periods
    root.attrs["index_name"] = "return period (years)"

# Read the file
root['prueba']
"""

In [None]:
# Code to remove a file inside a bucket

""""
import boto3
boto_c = boto3.client('s3', aws_access_key_id=os.environ["OSC_S3_ACCESS_KEY"], aws_secret_access_key=os.environ["OSC_S3_SECRET_KEY"])

to_remove = boto_c.list_objects_v2(Bucket=default_staging_bucket, Prefix='hazard/hazard_MV_prueba.zarr')['Contents']

keys = [item['Key'] for item in to_remove]

for key_ in keys:
    boto_c.delete_object(Bucket=default_staging_bucket, Key=key_)
"""