## Total Water Level
Notebook environment to migrate netcdf files to CF compliant zarr

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

<IPython.core.display.Javascript object>

### Set up modules and paths

In [2]:
# Import standard packages
import os
import pathlib
import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import netCDF4 as nc
import numpy.ma as ma

# Import custom functionality
from coclicodata.drive_config import p_drive
from coclicodata.etl.cf_compliancy_checker import check_compliancy, save_compliancy
from coastmonitor.io.utils import name_block

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# Get root paths
home = pathlib.Path().home()
root = home.root

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


<IPython.core.display.Javascript object>

In [86]:
# Project paths & files (manual input)
WP_dir = coclico_data_dir.joinpath("WP3")
data_dir = WP_dir.joinpath("data")
ds_dir = data_dir.joinpath("NetCDF")
ds_dirfut = data_dir.joinpath("NetCDF_MarineDynamicsChanges_TWL")

ds_filename = ds_dir.joinpath("CTP_ReturnPeriods.nc")
ds_filename_fut = ds_dirfut.joinpath("CTP_ReturnPeriods_SLR.nc")

ds_out_filename = ds_dirfut.joinpath("CTP_ReturnPeriods_SLR")

CF_dir = data_dir.joinpath(r"CF")

<IPython.core.display.Javascript object>

### Check CF compliancy original NetCDF files

In [4]:
ds = xr.open_dataset(ds_filename)
# Add CRS
ds.attrs["crs"] = 3035
ds

<IPython.core.display.Javascript object>

In [5]:
ds_fut = xr.open_dataset(ds_filename_fut)
# Add CRS
ds_fut.attrs["crs"] = 3035
ds_fut

<IPython.core.display.Javascript object>

In [8]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_filename, working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [9]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_filename, working_dir=CF_dir)

# NOTE: File is already CF-compliant :)



<IPython.core.display.Javascript object>

In [10]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_filename_fut, working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [11]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_filename_fut, working_dir=CF_dir)

# NOTE: File is already CF-compliant :)



<IPython.core.display.Javascript object>

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [6]:
# Change locs to stations
ds = ds.rename_dims({'locs': 'stations'})

<IPython.core.display.Javascript object>

In [7]:
# Stack the data variables into a new dimension 'Return Periods'
rp_data = xr.concat([ds['RP1'], ds['RP100'], ds['RP1000']], dim='rp')

# Assign the new dimension labels
rp_data = rp_data.assign_coords({'rp': [float(f) for f in [1, 100, 1000]]})

# Now assign this data array back to the original dataset under a new variable name
ds['twl'] = rp_data

# Optionally, you can remove the old data variables
ds = ds.drop_vars(['RP1', 'RP100', 'RP1000'])

ds

<IPython.core.display.Javascript object>

In [8]:
ds = ds.expand_dims(dim={"time": [float(2010)], "scenarios": np.array(["None"], dtype="S")})
ds = ds.rename_dims({"scenarios": "nscenarios"})
ds

<IPython.core.display.Javascript object>

In [9]:
ds.twl.shape

(1, 1, 3, 51010)

<IPython.core.display.Javascript object>

In [10]:
# Change locs to stations
ds_fut = ds_fut.rename_dims({'locs': 'stations'})

<IPython.core.display.Javascript object>

In [11]:
ds_fut

<IPython.core.display.Javascript object>

In [12]:
# make a seperate xarray dataset for each variable in the fut
separate_datasets = {}
for var_name in ds_fut.data_vars:
    separate_datasets[var_name] = ds_fut[[var_name]]

# loop, reshape and rename
for var_name, dataset in separate_datasets.items():
    print(f"Dataset for {var_name}")

    rpvar = var_name.split("RP")[1].split("_")[0]
    if "SSP" in var_name:
        scenvar = "SSP%s"%var_name.split("SSP")[1].split("_")[0]
    if "High_End" in var_name:
        scenvar = "High_End"  
    timevar = var_name.split("_")[-1]

    dataset = dataset.expand_dims(dim={"rp": [float(rpvar)], "time": [float(timevar)], "scenarios": np.array([scenvar], dtype="S")})
    dataset = dataset.rename_dims({"scenarios": "nscenarios"})
    dataset = dataset.rename_vars({var_name: "twl"})
    separate_datasets[var_name] = dataset

# merge the datasets together, expanding the shape
ds_futmerge = xr.merge(separate_datasets.values()) # this puts the matrix in correctly
#ds_futmerge.twl[:,:,:,1] # check values

Dataset for RP1_SLR_SSP585_2030
Dataset for RP1_SLR_SSP245_2050
Dataset for RP1_SLR_SSP585_2050
Dataset for RP1_SLR_SSP126_2100
Dataset for RP1_SLR_SSP245_2100
Dataset for RP1_SLR_SSP585_2100
Dataset for RP1_SLR_High_End_2100
Dataset for RP1_SLR_High_End_2150
Dataset for RP100_SLR_SSP585_2030
Dataset for RP100_SLR_SSP245_2050
Dataset for RP100_SLR_SSP585_2050
Dataset for RP100_SLR_SSP126_2100
Dataset for RP100_SLR_SSP245_2100
Dataset for RP100_SLR_SSP585_2100
Dataset for RP100_SLR_High_End_2100
Dataset for RP100_SLR_High_End_2150
Dataset for RP1000_SLR_SSP585_2030
Dataset for RP1000_SLR_SSP245_2050
Dataset for RP1000_SLR_SSP585_2050
Dataset for RP1000_SLR_SSP126_2100
Dataset for RP1000_SLR_SSP245_2100
Dataset for RP1000_SLR_SSP585_2100
Dataset for RP1000_SLR_High_End_2100
Dataset for RP1000_SLR_High_End_2150


<IPython.core.display.Javascript object>

In [37]:
# merge together two datasets (hist and future)
dsmerged = xr.merge([ds, ds_futmerge])
#dsmerged.twl[:,4,0,0] # check values

# fill up the matrix with dummy values (see excel WP2; this is manual work)
# Note, we do not fill up the 2150 column (we exclude these from the F/E by means of summaries)
ds_dum1 = ds.assign_coords(time=[float(2030)]) # None 2030
ds_dum2 = ds.assign_coords(time=[float(2050)]) # None 2050
ds_dum3 = ds.assign_coords(time=[float(2100)]) # None 2030
ds_dum4 = ds.sel(time=2010, nscenarios=0).expand_dims(dim ={"time": [float(2010)], "scenarios": np.array(["SSP126"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP126 2010
ds_dum5 = ds.sel(time=2010, nscenarios=0).expand_dims(dim ={"time": [float(2010)], "scenarios": np.array(["SSP245"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP245 2010
ds_dum6 = ds.sel(time=2010, nscenarios=0).expand_dims(dim ={"time": [float(2010)], "scenarios": np.array(["SSP585"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP585 2010
ds_dum7 = ds.sel(time=2010, nscenarios=0).expand_dims(dim ={"time": [float(2010)], "scenarios": np.array(["High_End"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # High-End 2010
ds_dum8 = ds_futmerge.sel(time=2030, nscenarios=3).expand_dims(dim ={"time": [float(2030)], "scenarios": np.array(["SSP126"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP126 2030
ds_dum9 = ds_futmerge.sel(time=2030, nscenarios=3).expand_dims(dim ={"time": [float(2030)], "scenarios": np.array(["SSP245"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP245 2030
ds_dum10 = ds_futmerge.sel(time=2030, nscenarios=3).expand_dims(dim ={"time": [float(2030)], "scenarios": np.array(["High_End"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # High-End 2030
ds_dum11 = ds_futmerge.sel(time=2050, nscenarios=2).expand_dims(dim ={"time": [float(2050)], "scenarios": np.array(["SSP126"], dtype="S")}).rename_dims({"scenarios": "nscenarios"}) # SSP126 2050
ds_dum12 = ds_futmerge.sel(time=2050, nscenarios=3).expand_dims(dim ={"time": [float(2050)], "scenarios": np.array(["High_End"], dtype="S")}).rename_dims({"scenarios": "nscenarios"})  # High-End 2050

dsmergeddum = xr.merge([dsmerged, ds_dum1, ds_dum2, ds_dum3, ds_dum4, ds_dum5, ds_dum6, ds_dum7, ds_dum8, ds_dum9, ds_dum10, ds_dum11, ds_dum12])
#dsmergeddum.twl[:,4,0,0] # check values

<IPython.core.display.Javascript object>

In [76]:
# adjust twl attributes
dsmergeddum.attrs["title"] = "EU Coastal Target Points - TWL Return Periods combined with sea level rise and subsidence"
dsmergeddum.attrs["summary"] = "Location of Coastal Target Points and their respective TWL return period values for RP1, RP100, and RP1000, combined with different sea level rise scenarios and considering the effect of subsidence."
dsmergeddum.attrs["keywords"] = "Extreme Event, Hindcast, EU scale, Sea Level Rise, Subsidence"
dsmergeddum.attrs["date_created"] = "23th of September 2024"

# add or change certain variable / coordinate attributes
dataset_attributes = {
    "rp": {"long_name": "return period", "units": "yr"},
    "time": {"long_name": "time", "units": "yr"},
    "scenarios": {"long_name": "climate scenarios"},
}  # specify custom (CF convention) attributes

# add / overwrite attributes
for k, v in dataset_attributes.items():
    try:
        dsmergeddum[k].attrs = dataset_attributes[k]
    except:
        continue

<IPython.core.display.Javascript object>

In [80]:
# save new .nc files
dsmergeddum.to_netcdf(path=str(ds_filename_fut).replace(".nc", "_CF.nc"))

<IPython.core.display.Javascript object>

### Check CF compliancy altered NetCDF files

In [81]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=str(ds_filename_fut).replace(".nc", "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [82]:
# save original CF compliancy
save_compliancy(cap, testfile=str(ds_filename_fut).replace(".nc", "_CF.nc"), working_dir=CF_dir)

# NOTE: File is already CF-compliant :)



<IPython.core.display.Javascript object>

### write data to Zarr files

In [88]:
# export to zarr in write mode (to overwrite if exists)
dsmergeddum.to_zarr(ds_dir.joinpath("%s.zarr" % ds_out_filename), mode="w")

<xarray.backends.zarr.ZarrStore at 0x111ab9e8350>

<IPython.core.display.Javascript object>

### Open written dataset

In [89]:
ds_new = xr.open_zarr(ds_dirfut.joinpath("%s.zarr" % ds_out_filename))
ds_new

Unnamed: 0,Array,Chunk
Bytes,398.52 kiB,199.26 kiB
Shape,"(51010,)","(25505,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 398.52 kiB 199.26 kiB Shape (51010,) (25505,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",51010  1,

Unnamed: 0,Array,Chunk
Bytes,398.52 kiB,199.26 kiB
Shape,"(51010,)","(25505,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,398.52 kiB,199.26 kiB
Shape,"(51010,)","(25505,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 398.52 kiB 199.26 kiB Shape (51010,) (25505,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",51010  1,

Unnamed: 0,Array,Chunk
Bytes,398.52 kiB,199.26 kiB
Shape,"(51010,)","(25505,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,40 B,40 B
Shape,"(5,)","(5,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray
"Array Chunk Bytes 40 B 40 B Shape (5,) (5,) Dask graph 1 chunks in 2 graph layers Data type |S8 numpy.ndarray",5  1,

Unnamed: 0,Array,Chunk
Bytes,40 B,40 B
Shape,"(5,)","(5,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S8 numpy.ndarray,|S8 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,29.19 MiB,797.03 kiB
Shape,"(5, 5, 3, 51010)","(2, 2, 1, 25505)"
Dask graph,54 chunks in 2 graph layers,54 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 29.19 MiB 797.03 kiB Shape (5, 5, 3, 51010) (2, 2, 1, 25505) Dask graph 54 chunks in 2 graph layers Data type float64 numpy.ndarray",5  1  51010  3  5,

Unnamed: 0,Array,Chunk
Bytes,29.19 MiB,797.03 kiB
Shape,"(5, 5, 3, 51010)","(2, 2, 1, 25505)"
Dask graph,54 chunks in 2 graph layers,54 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


<IPython.core.display.Javascript object>