# 01 - Storm Surge Dataset JRC

This script performs the following tasks:
1. [Auth] writes data to Zarr files (cloud-native file format) (AUTh)
3. [Deltares] checks and creates a geoJSON from Zarr data (required for the Front-End)
2. [Deltares] uploads the Zarr to a Google Cloud Storage (GCS) bucket 
4. [Deltares] uploads the geoJSON to Mapbox 
5. [Deltares] update the STAC

TODO: 
- make consistent with cf conventions (AUTh?)
- bold names in Zarr due to a dimension index (maybe this should be changed..)
- come up with checks for Zarr file before continueing to create a geoJSON

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

# imports
import geojson
import netCDF4 as nc
import os
import pathlib
import sys
import platform
import xarray as xr
import pandas as pd
import zarr
import subprocess
import warnings
from google.cloud import storage

warnings.filterwarnings("ignore")

# make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# OS independent path configurations
if platform.system() == "Windows":
    root = pathlib.Path("P:/")
else:  # linux or other
    root = pathlib.Path("/p/")
# root = pathlib.Path().home().root
coclico_data_dir = pathlib.Path(root, "11205479-coclico", "data")



<IPython.core.display.Javascript object>

In [96]:
# paths to the dataset, manual input
eessl_dir = coclico_data_dir.joinpath("01_storm_surge_jrc")
eessl_historical_path = eessl_dir.joinpath("CoastAlRisk_Europe_EESSL_Historical.nc")
eessl_rcp45_path = eessl_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP45.nc")
eessl_rcp85_path = eessl_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP85.nc")
eessl_out_file = "CoastAlRisk_Europe_EESSL"

# GCS and mapbox private access keys
GCS_token = coclico_data_dir.joinpath(
    "google_credentials.json"
)  # path name (including json file name)
mapbox_token = r"sk.eyJ1IjoiZ2xvYmFsLWRhdGEtdmlld2VyIiwiYSI6ImNsMWx1azIyejA5cmwzanBueTNwdDB0djQifQ.hkbA5TGIiOcve4mZpi44Uw"  # mabox key

<IPython.core.display.Javascript object>

# 1. write data to Zarr files

In [3]:
# open datasets
eessl_historical = xr.open_dataset(eessl_historical_path)
eessl_45rcp = xr.open_dataset(eessl_rcp45_path)
eessl_85rcp = xr.open_dataset(eessl_rcp85_path)

# check original dataset
# eessl_historical

<IPython.core.display.Javascript object>

In [4]:
# rename variables, if necessary

# set some data variables to coordinates to avoid duplication of dimensions in later stage
eessl_historical = eessl_historical.set_coords(["longitude", "latitude", "RP"])
eessl_45rcp = eessl_45rcp.set_coords(["longitude", "latitude", "RP"])
eessl_85rcp = eessl_85rcp.set_coords(["longitude", "latitude", "RP"])

<IPython.core.display.Javascript object>

In [5]:
# concat datasets along new dimension with index values and name derived from pandas index object, if necessary
eessl = xr.concat(
    [eessl_historical, eessl_45rcp, eessl_85rcp],
    pd.Index(["historical", "rcp45", "rcp85"], name="scenario"),
)

<IPython.core.display.Javascript object>

In [6]:
# rename dimension names
eessl = eessl.rename_dims({"row": "stations", "col": "rp"})

# re-order shape of the data variables
eessl = eessl.transpose("scenario", "stations", "rp")

<IPython.core.display.Javascript object>

In [7]:
# check the xarray dataset
eessl

<IPython.core.display.Javascript object>

In [9]:
# export to zarr in write mode (to overwrite iff exists)
eessl.to_zarr(eessl_dir.joinpath("%s.zarr" % eessl_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x1c6a2e5c200>

<IPython.core.display.Javascript object>

# 2. check and create geoJSON from Zarr data

In [None]:
# locally stored Zarr
eessl = xr.open_dataset(eessl_dir.joinpath("%s.zarr" % eessl_out_file))

In [None]:
# do checks (to be decided upon)

In [58]:
# write geoJSON

# filter out scenarios
ds_list_name = []
for idx, i in enumerate(eessl["scenario"][:].values):
    ds_list_name.append(eessl_out_file + "_" + i)
    # ds_list_name.append(eessl_out_file
    # print(idx, eessl["ssl"][idx, :, :].values.shape)

# write data to files (multiple value files - arrays) - Mapbox styling could be done with arrays
for idx, name in enumerate(ds_list_name):
    rps = eessl["RP"][:].values

    features = []
    for j, (lon, lat) in enumerate(
        zip(eessl["longitude"][:].values, eessl["latitude"][:].values)
    ):
        point = geojson.Point((float(lon), float(lat)))
        feature = geojson.Feature(geometry=point)
        feature["properties"]["locationId"] = j
        for a, b in zip(rps, eessl["ssl"][idx, j, :].values):
            feature["properties"]["rp_%s" % int(a)] = str(b)
        features.append(feature)

    # store the features
    collection = geojson.FeatureCollection(features)
    with open(os.path.join(eessl_dir, "platform", r"%s.geojson" % name), "w") as f:
        geojson.dump(collection, f)

<IPython.core.display.Javascript object>

In [67]:
# check geojson

with open(os.path.join(eessl_dir, "platform", "%s.geojson" % ds_list_name[2])) as f:
    check = geojson.load(f)

print(check["features"][2])

# check the minima and maxima for the colormap boundaries
#scenario = 0
#for idx, i in enumerate(rps):
#    print(
#        i,
#        round(min(eessl["ssl"][scenario, :, idx].values), 2),
#        round(max(eessl["ssl"][scenario, :, idx].values), 2),
#    )

{"geometry": {"coordinates": [-0.1, 49.7], "type": "Point"}, "properties": {"locationId": 2, "rp_10": "2.12493", "rp_100": "2.44322", "rp_1000": "2.62478", "rp_20": "2.24118", "rp_200": "2.50843", "rp_5": "1.985", "rp_50": "2.36607", "rp_500": "2.57984"}, "type": "Feature"}
5.0 0.23 4.19
10.0 0.24 4.21
20.0 0.24 4.32
50.0 0.24 4.71
100.0 0.25 4.97
200.0 0.25 5.19
500.0 0.26 5.44
1000.0 0.26 5.61


<IPython.core.display.Javascript object>

# 3. upload Zarr to GCS bucket

In [198]:
# upload zarr folder to GCS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(GCS_token)

# function to upload zarr folder to GCS
storage_client = storage.Client()
def upload_from_directory(directory_path, dest_bucket_name, dest_blob_name):
    rel_paths = directory_path.glob("**/*")
    bucket = storage_client.bucket(dest_bucket_name)
    for local_file in rel_paths:
        remote_path = f'{dest_blob_name}/{"/".join(str(local_file).split(os.sep)[5:])}' # note 5: is hardcoded and might lead to problems
        if os.path.isfile(local_file):
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)

    # print status
    print("Folder uploaded to GCS")

# specification of directory, bucket and file name to feed into the function
directory_path = eessl_dir.joinpath("%s.zarr" % eessl_out_file)
dest_bucket_name = "dgds-data-public"
dest_blob_name = "coclico/" + eessl_out_file + ".zarr"
folder_upload = upload_from_directory(directory_path, dest_bucket_name, dest_blob_name)

Folder uploaded to GCS


<IPython.core.display.Javascript object>

# 4. upload geoJSON to Mapbox

In [71]:
# ingest geoJSON into mapbox tilesets

# python way of running CLI
for idx, i in enumerate(ds_list_name):
    if len(i) > 32:
        out_name = ds_list_name[idx].replace(
            "historical", "hist"
        )  # cap is at 32 digits
    if len(i) < 32:
        out_name = i  # continue normally

    # automated CLI mapbox upload
    subprocess.run(
        [
            "mapbox",
            "--access-token",
            mapbox_token,
            "upload",
            r"global-data-viewer.%s" % out_name.split(".")[0],
            os.path.join(eessl_dir, "platform", r"%s.geojson" % i.split(".")[0]),
        ],
        shell=True,
        check=True,
    )

# notebook version of CLI
#!mapbox --access-token {mapbox_token} upload {filename} {source}

# CLI command
# mapbox --access-token sk.eyJ1IjoiZ2xvYmFsLWRhdGEtdmlld2VyIiwiYSI6ImNsMWx1azIyejA5cmwzanBueTNwdDB0djQifQ.hkbA5TGIiOcve4mZpi44Uw upload global-data-viewer.test_cli_upload p:\11205479-coclico\data\01_storm_surge_jrc\platform\CoastAlRisk_Europe_EESSL_Historical.geojson

<IPython.core.display.Javascript object>