# 01 - Storm Surge Dataset JRC

This script performs the following tasks:
1. [Auth] writes data to Zarr files (cloud-native file format) (AUTh)
3. [Deltares] checks and creates a geoJSON from Zarr data (required for the Front-End)
2. [Deltares] uploads the Zarr to a Google Cloud Storage (GCS) bucket 
4. [Deltares] uploads the geoJSON to Mapbox 
5. [Deltares] update the STAC

TODO: 
- make consistent with cf conventions (AUTh?)
- bold names in Zarr due to a dimension index (maybe this should be changed..)
- come up with checks for Zarr file before continueing to create a geoJSON
- multiple variables in generation (and the arbitrary order of it?!)

In [10]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

# imports
import geojson
import netCDF4 as nc
import os
import pathlib
import sys
import platform
import xarray as xr
import pandas as pd
import zarr
import subprocess
import warnings
import pystac
from google.cloud import storage
from dotenv import dotenv_values

warnings.filterwarnings("ignore")

# make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# OS independent path configurations
if platform.system() == "Windows":
    root = pathlib.Path("P:/")
else:  # linux or other
    root = pathlib.Path("/p/")
# root = pathlib.Path().home().root
coclico_data_dir = pathlib.Path(root, "11205479-coclico", "data")

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

In [11]:
# paths to the dataset, manual input
dataset_dir = coclico_data_dir.joinpath("01_storm_surge_jrc")
dataset_historical_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_Historical.nc")
dataset_rcp45_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP45.nc")
dataset_rcp85_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP85.nc")
dataset_out_file = "CoastAlRisk_Europe_EESSL"

# GCS and mapbox private access keys
GCS_token = coclico_data_dir.joinpath(
    "google_credentials.json"
)  # path name (including json file name)
config = dotenv_values(".env")
mapbox_token = config["MAPBOX_TOKEN"]  # mapbox private key

<IPython.core.display.Javascript object>

# 1. write data to Zarr files

In [104]:
# open datasets
dataset_historical = xr.open_dataset(dataset_historical_path)
dataset_45rcp = xr.open_dataset(dataset_rcp45_path)
dataset_85rcp = xr.open_dataset(dataset_rcp85_path)

# check original dataset
# dataset_historical

<IPython.core.display.Javascript object>

In [105]:
# rename dimension names
dataset_historical = dataset_historical.rename_dims({"row": "stations", "col": "rp"})
dataset_45rcp = dataset_45rcp.rename_dims({"row": "stations", "col": "rp"})
dataset_85rcp = dataset_85rcp.rename_dims({"row": "stations", "col": "rp"})

# rename variables, if necessary
dataset_historical = dataset_historical.rename_vars({"RP": "rp"})
dataset_45rcp = dataset_45rcp.rename_vars({"RP": "rp"})
dataset_85rcp = dataset_85rcp.rename_vars({"RP": "rp"})

# set some data variables to coordinates to avoid duplication of dimensions in later stage
dataset_historical = dataset_historical.set_coords(["longitude", "latitude", "rp"])
dataset_45rcp = dataset_45rcp.set_coords(["longitude", "latitude", "rp"])
dataset_85rcp = dataset_85rcp.set_coords(["longitude", "latitude", "rp"])

<IPython.core.display.Javascript object>

In [106]:
# concat datasets along new dimension with index values and name derived from pandas index object, if necessary
dataset = xr.concat(
    [dataset_historical, dataset_45rcp, dataset_85rcp],
    pd.Index(["historical", "rcp45", "rcp85"], name="scenario"),
)

<IPython.core.display.Javascript object>

In [108]:
# re-order shape of the data variables
dataset = dataset.transpose("scenario", "stations", "rp")

<IPython.core.display.Javascript object>

In [110]:
# check the xarray dataset
dataset

<IPython.core.display.Javascript object>

In [111]:
# export to zarr in write mode (to overwrite iff exists)
dataset.to_zarr(dataset_dir.joinpath("%s.zarr" % dataset_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x210cb728580>

<IPython.core.display.Javascript object>

# 2. check and create geoJSON from Zarr data

In [112]:
# locally stored Zarr
dataset = xr.open_dataset(dataset_dir.joinpath("%s.zarr" % dataset_out_file))

<IPython.core.display.Javascript object>

In [64]:
# do checks (to be decided upon)

<IPython.core.display.Javascript object>

In [120]:
dimvals = {k: v.values for k, v in cube_dimensions.items() if v.values}

# Add children
for values in product(*dimvals.values()):
    # TODO Improve key gen and align with geojson generation
    key = "-".join(
        map(lambda x: "-".join(x), zip(dimvals.keys(), map(str, values)))
    )

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

<IPython.core.display.Javascript object>

In [138]:
var_dims_idx
var_dims_ids
var_dims_val

[array(['historical', 'rcp45', 'rcp85'], dtype=object),
 array([   5.,   10.,   20.,   50.,  100.,  200.,  500., 1000.],
       dtype=float32)]

<IPython.core.display.Javascript object>

In [176]:
from itertools import product

list3 = [f"{a}_{b}" for a, b in product(var_dims_val[0], var_dims_val[1])]
print(list3)

['historical_5.0', 'historical_10.0', 'historical_20.0', 'historical_50.0', 'historical_100.0', 'historical_200.0', 'historical_500.0', 'historical_1000.0', 'rcp45_5.0', 'rcp45_10.0', 'rcp45_20.0', 'rcp45_50.0', 'rcp45_100.0', 'rcp45_200.0', 'rcp45_500.0', 'rcp45_1000.0', 'rcp85_5.0', 'rcp85_10.0', 'rcp85_20.0', 'rcp85_50.0', 'rcp85_100.0', 'rcp85_200.0', 'rcp85_500.0', 'rcp85_1000.0']


<IPython.core.display.Javascript object>

In [154]:
# write geoJSON (generic)

# list(dataset.dims)
# write data to single flattened GeoJSON - Mapbox styling could use this
for var in dataset.keys():  # loop over variable
    dims = list(dataset["%s" % var].dims)
    var_dims_idx = []
    var_dims_ids = []
    var_dims_val = []
    for idv, name in enumerate(dims):  # loop over dimensions of variable
        if name != "stations":  # assumes stations is present and independent
            var_dims_idx.append(idv)
            var_dims_ids.append(name)
            var_dims_val.append(dataset["%s" % name][:].values)
            
    # dot product of var_dims_val
    

    features = []
    for j, (lon, lat) in enumerate(
        zip(dataset["longitude"][:].values, dataset["latitude"][:].values)
    ):  # assumes longitude and latitude are present and independent
        point = geojson.Point((float(lon), float(lat)))
        feature = geojson.Feature(geometry=point)
        feature["properties"]["locationId"] = j

        for idv, (a, b, c) in enumerate(zip(var_dims_idx, var_dims_ids, var_dims_val)):
            print(idv, a, b, c)
        break
#            print(idv, a)
#            feature["properties"]["%s_%s_%s_%s" % int(a)] = str(b)
#            # for b in a:
#
#            break
#        break
#
#        for a, b in zip(rps, dataset["%s"%var][idx, j, :].values):
#    #        feature["properties"]["rp_%s" % int(a)] = str(b)
#    #    features.append(feature)

0 0 scenario ['historical' 'rcp45' 'rcp85']
1 2 rp [   5.   10.   20.   50.  100.  200.  500. 1000.]


<IPython.core.display.Javascript object>

In [129]:
# write geoJSON

# filter out scenarios
ds_list_name = []
for idx, i in enumerate(dataset["scenario"][:].values):
    ds_list_name.append(dataset_out_file + "_" + i)

# write data to files (multiple value files - arrays) - Mapbox styling could be done with arrays
for idx, name in enumerate(ds_list_name):
    rps = dataset["rp"][:].values

    features = []
    for j, (lon, lat) in enumerate(
        zip(dataset["longitude"][:].values, dataset["latitude"][:].values)
    ):
        point = geojson.Point((float(lon), float(lat)))
        feature = geojson.Feature(geometry=point)
        feature["properties"]["locationId"] = j
        for a, b in zip(rps, dataset["ssl"][idx, j, :].values):
            feature["properties"]["rp_%s" % int(a)] = str(b)
        features.append(feature)

    # store the features
    collection = geojson.FeatureCollection(features)
    with open(os.path.join(dataset_dir, "platform", r"%s.geojson" % name), "w") as f:
        geojson.dump(collection, f)

<IPython.core.display.Javascript object>

In [None]:

# copy current folder and generate.py..

In [None]:
# geojson config ()

    zarr_fn = "gcs://dgds-data-public/coclico/CoastAlRisk_Europe_EESSL.zarr"
    mapbox_url, mapbox_source = "https://", "adsasd"
    template = "deltares-coclico-ssl"
    variable = "elevation"
    datasetid = f"deltares-coclico-{variable}"
    dimensions = ["RP", "scenario"]  # could be automatic

# this needs to be aligned!! 
for values in product(*dimvals.values()):
        # TODO Improve key gen and align with geojson generation
        key = "-".join(
            map(lambda x: "-".join(x), zip(dimvals.keys(), map(str, values)))
        )
        feature = gen_default_item(f"{variable}-mapbox-{key}")
        feature.add_asset("mapbox", gen_mapbox_asset(mapbox_url, mapbox_source))
        feature.properties = gen_default_props(key=key)
        for (k, v) in zip(dimvals.keys(), values):
            feature.properties[k] = v
        dataset.add_item(feature, strategy=layout)
        feature.set_self_href(f"../{variable}-mapbox")

In [67]:
# check geojson

with open(os.path.join(dataset_dir, "platform", "%s.geojson" % ds_list_name[2])) as f:
    check = geojson.load(f)

print(check["features"][2])

# check the minima and maxima for the colormap boundaries
#scenario = 0
#for idx, i in enumerate(rps):
#    print(
#        i,
#        round(min(dataset["ssl"][scenario, :, idx].values), 2),
#        round(max(dataset["ssl"][scenario, :, idx].values), 2),
#    )

{"geometry": {"coordinates": [-0.1, 49.7], "type": "Point"}, "properties": {"locationId": 2, "rp_10": "2.12493", "rp_100": "2.44322", "rp_1000": "2.62478", "rp_20": "2.24118", "rp_200": "2.50843", "rp_5": "1.985", "rp_50": "2.36607", "rp_500": "2.57984"}, "type": "Feature"}
5.0 0.23 4.19
10.0 0.24 4.21
20.0 0.24 4.32
50.0 0.24 4.71
100.0 0.25 4.97
200.0 0.25 5.19
500.0 0.26 5.44
1000.0 0.26 5.61


<IPython.core.display.Javascript object>

# 3. upload Zarr to GCS bucket

In [198]:
# upload zarr folder to GCS
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = str(GCS_token)

# function to upload zarr folder to GCS
storage_client = storage.Client()
def upload_from_directory(directory_path, dest_bucket_name, dest_blob_name):
    rel_paths = directory_path.glob("**/*")
    bucket = storage_client.bucket(dest_bucket_name)
    for local_file in rel_paths:
        remote_path = f'{dest_blob_name}/{"/".join(str(local_file).split(os.sep)[5:])}' # note 5: is hardcoded and might lead to problems
        if os.path.isfile(local_file):
            blob = bucket.blob(remote_path)
            blob.upload_from_filename(local_file)

    # print status
    print("Folder uploaded to GCS")

# specification of directory, bucket and file name to feed into the function
directory_path = dataset_dir.joinpath("%s.zarr" % dataset_out_file)
dest_bucket_name = "dgds-data-public"
dest_blob_name = "coclico/" + dataset_out_file + ".zarr"
folder_upload = upload_from_directory(directory_path, dest_bucket_name, dest_blob_name)

Folder uploaded to GCS


<IPython.core.display.Javascript object>

# 4. upload geoJSON to Mapbox

In [71]:
# ingest geoJSON into mapbox tilesets

# python way of running CLI
for idx, i in enumerate(ds_list_name):
    if len(i) > 32:
        out_name = ds_list_name[idx].replace(
            "historical", "hist"
        )  # cap is at 32 digits
    if len(i) < 32:
        out_name = i  # continue normally

    # automated CLI mapbox upload
    subprocess.run(
        [
            "mapbox",
            "--access-token",
            mapbox_token,
            "upload",
            r"global-data-viewer.%s" % out_name.split(".")[0],
            os.path.join(dataset_dir, "platform", r"%s.geojson" % i.split(".")[0]),
        ],
        shell=True,
        check=True,
    )

# notebook version of CLI
#!mapbox --access-token {mapbox_token} upload {filename} {source}

# CLI command
# mapbox --access-token sk.eyJ1IjoiZ2xvYmFsLWRhdGEtdmlld2VyIiwiYSI6ImNsMWx1azIyejA5cmwzanBueTNwdDB0djQifQ.hkbA5TGIiOcve4mZpi44Uw upload global-data-viewer.test_cli_upload p:\11205479-coclico\data\01_storm_surge_jrc\platform\CoastAlRisk_Europe_EESSL_Historical.geojson

<IPython.core.display.Javascript object>

# 5. Update the STAC