# Shoreline Change
Notebook environment to migrate netcdf files to CF compliant zarr

In [None]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

### Configure OS independent paths

In [None]:
# Import standard packages
import os
import pathlib
import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import netCDF4 as nc
import numpy.ma as ma

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))

# Get root paths
home = pathlib.Path().home()
root = home.root

# Import custom functionality
from etl import p_drive
from etl.CF_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11205479-coclico", "data")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)

In [None]:
# Project paths & files (manual input)
ds_dir = coclico_data_dir.joinpath("05_erosion_projections_jrc")
ds_rcp45_2050path = ds_dir.joinpath(
    "globalErosionProjections_Long_Term_Change_RCP45_2050.nc"
)
ds_rcp45_2100path = ds_dir.joinpath(
    "globalErosionProjections_Long_Term_Change_RCP45_2100.nc"
)
ds_rcp85_2050path = ds_dir.joinpath(
    "globalErosionProjections_Long_Term_Change_RCP85_2050.nc"
)
ds_rcp85_2100path = ds_dir.joinpath(
    "globalErosionProjections_Long_Term_Change_RCP85_2100.nc"
)
ds_out_file = "globalErosionProjections_Long_Term_Change"
CF_dir = coclico_data_dir.joinpath(r"CF")  # directory to save output CF check files

### Check CF compliancy original NetCDF files

In [None]:
# open datasets
ds_45rcp2050 = xr.open_dataset(ds_rcp45_2050path)
ds_45rcp2100 = xr.open_dataset(ds_rcp45_2100path)
ds_85rcp2050 = xr.open_dataset(ds_rcp85_2050path)
ds_85rcp2100 = xr.open_dataset(ds_rcp85_2100path)

# check original dataset
ds_45rcp2050

In [None]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp45_2050path, working_dir=CF_dir)

In [None]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp45_2050path, working_dir=CF_dir)

In [None]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp45_2100path, working_dir=CF_dir)

In [None]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp45_2100path, working_dir=CF_dir)

In [None]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp85_2050path, working_dir=CF_dir)

In [None]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp85_2050path, working_dir=CF_dir)

In [None]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=ds_rcp85_2100path, working_dir=CF_dir)

In [None]:
# save original CF compliancy
save_compliancy(cap, testfile=ds_rcp85_2100path, working_dir=CF_dir)

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [None]:
# NetCDF attribute alterations

ds_45rcp2050.lat.attrs["units"] = "degrees_north"
ds_45rcp2050.lon.attrs["units"] = "degrees_east"
ds_45rcp2100.lat.attrs["units"] = "degrees_north"
ds_45rcp2100.lon.attrs["units"] = "degrees_east"
ds_85rcp2050.lat.attrs["units"] = "degrees_north"
ds_85rcp2050.lon.attrs["units"] = "degrees_east"
ds_85rcp2100.lat.attrs["units"] = "degrees_north"
ds_85rcp2100.lon.attrs["units"] = "degrees_east"

In [None]:
# NetCDF variable and dimension alterations
ds_45rcp2050 = ds_45rcp2050.set_coords(["lon", "lat"])
ds_45rcp2100 = ds_45rcp2100.set_coords(["lon", "lat"])
ds_85rcp2050 = ds_85rcp2050.set_coords(["lon", "lat"])
ds_85rcp2100 = ds_85rcp2100.set_coords(["lon", "lat"])

ds_45rcp2050arr = xr.concat(
    [
        ds_45rcp2050["perc1"],
        ds_45rcp2050["perc5"],
        ds_45rcp2050["perc17"],
        ds_45rcp2050["perc50"],
        ds_45rcp2050["perc83"],
        ds_45rcp2050["perc95"],
        ds_45rcp2050["perc99"],
    ],
    dim="nensemble",
)

ds_45rcp2050arr = ds_45rcp2050arr.assign_coords(
    {
        "ensemble": (
            "nensemble",
            np.array(["1%", "5%", "17%", "50%", "83%", "95%", "99%"], dtype="S"),
        )
    }
)
ds_45rcp2050_new = ds_45rcp2050arr.to_dataset(name="shoreline_change")
ds_45rcp2050_new.shoreline_change.attrs["long_name"] = "shoreline_change"
ds_45rcp2050_new.ensemble.attrs["long_name"] = "ensemble"

ds_45rcp2100arr = xr.concat(
    [
        ds_45rcp2100["perc1"],
        ds_45rcp2100["perc5"],
        ds_45rcp2100["perc17"],
        ds_45rcp2100["perc50"],
        ds_45rcp2100["perc83"],
        ds_45rcp2100["perc95"],
        ds_45rcp2100["perc99"],
    ],
    dim="nensemble",
)

ds_45rcp2100arr = ds_45rcp2100arr.assign_coords(
    {
        "ensemble": (
            "nensemble",
            np.array(["1%", "5%", "17%", "50%", "83%", "95%", "99%"], dtype="S"),
        )
    }
)
ds_45rcp2100_new = ds_45rcp2100arr.to_dataset(name="shoreline_change")
ds_45rcp2100_new.shoreline_change.attrs["long_name"] = "shoreline_change"
ds_45rcp2100_new.ensemble.attrs["long_name"] = "ensemble"

ds_85rcp2050arr = xr.concat(
    [
        ds_85rcp2050["perc1"],
        ds_85rcp2050["perc5"],
        ds_85rcp2050["perc17"],
        ds_85rcp2050["perc50"],
        ds_85rcp2050["perc83"],
        ds_85rcp2050["perc95"],
        ds_85rcp2050["perc99"],
    ],
    dim="nensemble",
)

ds_85rcp2050arr = ds_85rcp2050arr.assign_coords(
    {
        "ensemble": (
            "nensemble",
            np.array(["1%", "5%", "17%", "50%", "83%", "95%", "99%"], dtype="S"),
        )
    }
)
ds_85rcp2050_new = ds_85rcp2050arr.to_dataset(name="shoreline_change")
ds_85rcp2050_new.shoreline_change.attrs["long_name"] = "shoreline_change"
ds_85rcp2050_new.ensemble.attrs["long_name"] = "ensemble"

ds_85rcp2100arr = xr.concat(
    [
        ds_85rcp2100["perc1"],
        ds_85rcp2100["perc5"],
        ds_85rcp2100["perc17"],
        ds_85rcp2100["perc50"],
        ds_85rcp2100["perc83"],
        ds_85rcp2100["perc95"],
        ds_85rcp2100["perc99"],
    ],
    dim="nensemble",
)

ds_85rcp2100arr = ds_85rcp2100arr.assign_coords(
    {
        "ensemble": (
            "nensemble",
            np.array(["1%", "5%", "17%", "50%", "83%", "95%", "99%"], dtype="S"),
        )
    }
)
ds_85rcp2100_new = ds_85rcp2100arr.to_dataset(name="shoreline_change")
ds_85rcp2100_new.shoreline_change.attrs["long_name"] = "shoreline_change"
ds_85rcp2100_new.ensemble.attrs["long_name"] = "ensemble"

In [None]:
# add or change certain variable / coordinate attributes
dataset_attributes = {
    "title": "Global shoreline change projections for the year 2050 under RCP4.5",
    "description": "Projections of global shoreline change in view of climate change considering the combined effects of ambient change, sea level rise and storm driven erosion",
    "source": 'The procedure to produce the dataset and the findings are discussed in the paper: "Vousdoukas, M.I., Ranasinghe, R., Mentaschi, L., Plomaritis, T.P., Athanasiou, P., Luijendijk, A., and Feyen, L. (2020). Sandy coastlines under threat of erosion. Nature Communications. https://www.nature.com/articles/s41558-020-0697-0,"',
    "Conventions": "CF-1.8",
}  # specify custom (CF convention) attributes

# add / overwrite attributes
for k, v in dataset_attributes.items():
    try:
        ds_45rcp2050_new.attrs = dataset_attributes
        ds_45rcp2100_new.attrs = dataset_attributes
        ds_85rcp2050_new.attrs = dataset_attributes
        ds_85rcp2100_new.attrs = dataset_attributes
    except:
        continue

# rename or swap dimension names, the latter in case the name already exists as coordinate
ds_45rcp2050_new = ds_45rcp2050_new.rename_dims({"row": "stations"})
ds_45rcp2100_new = ds_45rcp2100_new.rename_dims({"row": "stations"})
ds_85rcp2050_new = ds_85rcp2050_new.rename_dims({"row": "stations"})
ds_85rcp2100_new = ds_85rcp2100_new.rename_dims({"row": "stations"})

In [None]:
# concat datasets along new dimension with index values and name derived from pandas index object, if necessary
ds_45rcp = xr.concat([ds_45rcp2050_new, ds_45rcp2100_new], dim="time")
ds_45rcp = ds_45rcp.assign_coords(time=("time", np.array([2050, 2100])))
ds_45rcp.time.attrs["long_name"] = "time"
ds_45rcp.time.attrs["units"] = "yr"

ds_85rcp = xr.concat([ds_85rcp2050_new, ds_85rcp2100_new], dim="time")
ds_85rcp = ds_85rcp.assign_coords(time=("time", np.array([2050, 2100])))
ds_85rcp.time.attrs["long_name"] = "time"
ds_85rcp.time.attrs["units"] = "yr"

dataset = xr.concat([ds_45rcp, ds_85rcp], dim="nscenarios")
dataset = dataset.assign_coords(
    scenarios=("nscenarios", np.array(["RCP45", "RCP85"], dtype="S"))
)
dataset.scenarios.attrs["long_name"] = "climate scenarios"

# add epsg
ds_45rcp2050_new.attrs["crs"] = 4326
ds_45rcp2100_new.attrs["crs"] = 4326
ds_85rcp2050_new.attrs["crs"] = 4326
ds_85rcp2100_new.attrs["crs"] = 4326
dataset.attrs["crs"] = 4326

# dataset["scenarios"].values.astype("U") # retrieve scenarios as string

In [None]:
# re-order shape of the data variables
ds_45rcp2050_new = ds_45rcp2050_new.transpose("stations", "nensemble")
ds_45rcp2100_new = ds_45rcp2100_new.transpose("stations", "nensemble")
ds_85rcp2050_new = ds_85rcp2050_new.transpose("stations", "nensemble")
ds_85rcp2100_new = ds_85rcp2100_new.transpose("stations", "nensemble")
dataset = dataset.transpose("nscenarios", "stations", "time", "nensemble")

In [None]:
# check the xarray dataset, best practice is to have as many as possible bold dimensions (dimension == coordinate name).
# in this way, the Front-End can access the variable directly without having to index the variable first

dataset
# dataset["scenarios"]

In [None]:
# save new .nc files
ds_45rcp2050_new.to_netcdf(path=str(ds_rcp45_2050path).replace(".nc", "_CF.nc"))
ds_45rcp2100_new.to_netcdf(path=str(ds_rcp45_2100path).replace(".nc", "_CF.nc"))
ds_85rcp2050_new.to_netcdf(path=str(ds_rcp85_2050path).replace(".nc", "_CF.nc"))
ds_85rcp2100_new.to_netcdf(path=str(ds_rcp85_2100path).replace(".nc", "_CF.nc"))
dataset.to_netcdf(path=ds_dir.joinpath(ds_out_file + "_CF.nc"))

### Check CF compliancy altered NetCDF files

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp45_2050path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp45_2050path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp45_2100path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp45_2100path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp85_2050path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp85_2050path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(ds_rcp85_2100path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(ds_rcp85_2100path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=ds_dir.joinpath(ds_out_file + "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=ds_dir.joinpath(ds_out_file + "_CF.nc"), working_dir=CF_dir,
)

### write data to Zarr files

In [None]:
# export to zarr in write mode (to overwrite if exists)
dataset.to_zarr(ds_dir.joinpath("%s.zarr" % ds_out_file), mode="w")