# Storm surge level

Notebook environment to migrate netcdf files to zarr and geojson

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

<IPython.core.display.Javascript object>

### Configure OS independent paths

In [2]:
# Import standard packages
import os
import pathlib
import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# Get root paths
home = pathlib.Path().home()
root = home.root

# Import custom functionality
from etl import p_drive, rel_root
from etl.extract import clear_zarr_information, get_geojson
from etl.keys import load_env_variables, load_google_credentials

# Define both local and remote drives
coclico_data_dir = p_drive.joinpath("11205479-coclico", "data")



<IPython.core.display.Javascript object>

In [14]:
# Project paths & files (manual input)
dataset_dir = coclico_data_dir.joinpath("01_storm_surge_jrc")
dataset_historical_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_Historical.nc")
dataset_rcp45_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP45.nc")
dataset_rcp85_path = dataset_dir.joinpath("CoastAlRisk_Europe_EESSL_RCP85.nc")
dataset_out_file = "CoastAlRisk_Europe_EESSL"

<IPython.core.display.Javascript object>

### load environment variables and set credentials

In [4]:
load_env_variables(env_var_keys=["MAPBOX_ACCESS_TOKEN"])
load_google_credentials(
    google_token=coclico_data_dir.joinpath("google_credentials.json")
)

Environmental variables loaded.
Google Application Credentials load into environment.




<IPython.core.display.Javascript object>

### write data to Zarr files (dataset dependent)

In [5]:
# open datasets
dataset_historical = xr.open_dataset(dataset_historical_path)
dataset_45rcp = xr.open_dataset(dataset_rcp45_path)
dataset_85rcp = xr.open_dataset(dataset_rcp85_path)

# check original dataset
# dataset_historical

Cannot find the ecCodes library


<IPython.core.display.Javascript object>

In [6]:
# rename or swap dimension names, the latter in case the name already exists as coordinate
dataset_historical = dataset_historical.rename_dims({"row": "stations"})
dataset_45rcp = dataset_45rcp.rename_dims({"row": "stations"})
dataset_85rcp = dataset_85rcp.rename_dims({"row": "stations"})
dataset_historical = dataset_historical.swap_dims({"col": "RP"})
dataset_45rcp = dataset_45rcp.swap_dims({"col": "RP"})
dataset_85rcp = dataset_85rcp.swap_dims({"col": "RP"})

# rename variables, if necessary
# dataset_historical = dataset_historical.rename_vars({"RP": "rp"})
# dataset_45rcp = dataset_45rcp.rename_vars({"RP": "rp"})
# dataset_85rcp = dataset_85rcp.rename_vars({"RP": "rp"})

# set some data variables to coordinates to avoid duplication of dimensions in later stage
dataset_historical = dataset_historical.set_coords(["longitude", "latitude", "RP"])
dataset_45rcp = dataset_45rcp.set_coords(["longitude", "latitude", "RP"])
dataset_85rcp = dataset_85rcp.set_coords(["longitude", "latitude", "RP"])

<IPython.core.display.Javascript object>

In [7]:
# concat datasets along new dimension with index values and name derived from pandas index object, if necessary
dataset = xr.concat(
    [dataset_historical, dataset_45rcp, dataset_85rcp],
    pd.Index(["Historical", "RCP45", "RCP85"], name="scenario"),
)

<IPython.core.display.Javascript object>

In [8]:
# re-order shape of the data variables
dataset = dataset.transpose("scenario", "stations", "RP")

<IPython.core.display.Javascript object>

In [34]:
# add attributes to variables and coordinates, if necessary

# specify custom (CF convention) attributes
dataset_attributes = {
    "geometry": {
        "long_name": "nuts 2 polygons in wkt string (degrees; epgsg:4326)",
        "geometry_type": "polygon",
        "units": "degrees",
        "comment": "Describe nuts regions (2010 version), which are available from Eurostat.",
        "crs_wkt": f"..",
    },
    "layer": {"long_name": "socio-economic layer of the dataset"},
    "nuts": {
        "long_name": "nuts region acronym",
        "source": "Nuts regions (2010 version), available from Eurostat.",
    },
    "name": {
        "long_name": "nuts region name",
        "source": "Nuts regions (2010 version), available from Eurostat.",
    },
    "country": {"long_name": "country acronym"},
}

# add / overwrite attributes
for k, v in dataset_attributes.items():
    try:
        dataset[k].attrs = dataset_attributes[k]
    except:
        continue

<IPython.core.display.Javascript object>

In [36]:
dataset

<IPython.core.display.Javascript object>

In [9]:
# check the xarray dataset, best practice is to have as many as possible bold dimensions (dimension == coordinate name).
# in this way, the Front-End can access the variable directly without having to index the variable first

dataset

<IPython.core.display.Javascript object>

In [15]:
# export to zarr in write mode (to overwrite if exists)
dataset.to_zarr(dataset_dir.joinpath("%s.zarr" % dataset_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x1a1f427ff20>

<IPython.core.display.Javascript object>

In [24]:
dataset["ssl"]

<IPython.core.display.Javascript object>

In [23]:
dataset["scenario"]

<IPython.core.display.Javascript object>