# Storm surge level

Notebook environment to migrate netcdf files to CF compliant zarr

In [33]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

### Configure OS independent paths

In [34]:
# Import standard packages
import os
import pathlib
import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))
sys.path.append(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\coclicodata')
# Get root paths
home = pathlib.Path().home()
root = home.root

# Import custom functionality
from etl import p_drive
from etl.CF_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
gca_data_dir = p_drive.joinpath("1000545-054-globalbeaches", "15_GlobalCoastalAtlas", "datasets")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)

<IPython.core.display.Javascript object>

In [35]:
# Project paths & files (manual input)
dataset_dir = gca_data_dir.joinpath("01. Shorelinemonitor_annual")
dataset_dir_shorelinemonitor = dataset_dir.joinpath("shorelinemonitor_annual.nc")

dataset_out_file = "CoastAlRisk_Europe_EESSL"
CF_dir = gca_data_dir.joinpath(r"CF")  # directory to save output CF check files

<IPython.core.display.Javascript object>

### Check CF compliancy original NetCDF files

In [36]:
# open datasets
dataset_shorelinemonitor = xr.open_dataset(dataset_dir_shorelinemonitor)


# check original dataset
dataset_shorelinemonitor

<IPython.core.display.Javascript object>

In [37]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=dataset_dir_shorelinemonitor, 
                 working_dir=CF_dir
                 )


<IPython.core.display.Javascript object>

In [38]:
# save original CF compliancy
save_compliancy(cap, testfile=dataset_dir_shorelinemonitor, working_dir=CF_dir)



<IPython.core.display.Javascript object>

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [39]:
import json

# NetCDF attribute alterations
f_global = open(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\datasets\01. Shorelinemonitor_annual\metadata_shorelinemonitor.json')
meta_global = json.load(f_global)

for attr_name, attr_val in meta_global.items():
    dataset_shorelinemonitor.attrs[attr_name] = attr_val

dataset_shorelinemonitor['Conventions'] = "CF-1.8"

<IPython.core.display.Javascript object>

In [40]:
dataset_shorelinemonitor

<IPython.core.display.Javascript object>

In [41]:
# NetCDF variable and dimension alterations

# rename or swap dimension names, the latter in case the name already exists as coordinate
dataset_shorelinemonitor = dataset_shorelinemonitor.rename_dims({"index": "stations"})

f_vars = open(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\datasets\01. Shorelinemonitor_annual\vars_shorelinemonitor2.json')
meta_vars = json.load(f_vars)

for var_name, var_dict in meta_vars.items():
    dataset_shorelinemonitor = dataset_shorelinemonitor.rename_vars({var_name : var_dict['name']})
    for key, value in var_dict.items():
        if key != 'name':
            dataset_shorelinemonitor[var_dict['name']].attrs[key] = value

# combine start and end coordinates into a transect
transect_dims = ('stations', 'start_coord', 'end_coord')

a = dataset_shorelinemonitor['Start_lon'].values
b = dataset_shorelinemonitor['Start_lat'].values
c = dataset_shorelinemonitor['End_lon'].values
d = dataset_shorelinemonitor['End_lat'].values
dataset_shorelinemonitor['transect_geom'] = (transect_dims, list(zip(list(zip(a, b)), list(zip(c, d)))))
dataset_shorelinemonitor['transect_geom'].attrs['long_name'] = 'Transect Geometry'

delete_vars = ['Start_lon', 'Start_lat', 'End_lon', 'End_lat']
for dv in delete_vars:
    dataset_shorelinemonitor = dataset_shorelinemonitor.drop(dv)
    
# change dtypes
object_vars = ['transect_id', 'country_id', 'continent', 'country', 'time', 'sp', 'outliers_1', 'outliers_2']
for ov in object_vars:
    dataset_shorelinemonitor[ov] = dataset_shorelinemonitor[ov].astype(str)

# set some data variables to coordinates to avoid duplication of dimensions in later stage
dataset_shorelinemonitor = dataset_shorelinemonitor.set_coords(["lon", "lat", "transect_id", "country_id", "continent", "country",
                                                                "changerate", "changerate_unc", "sandy"])

# drop index xarray
dataset_shorelinemonitor = dataset_shorelinemonitor.drop('index')

<IPython.core.display.Javascript object>

In [42]:
dataset_shorelinemonitor

<IPython.core.display.Javascript object>

In [43]:
stop

NameError: name 'stop' is not defined

<IPython.core.display.Javascript object>

In [None]:
# check the xarray dataset, best practice is to have as many as possible bold dimensions (dimension == coordinate name).
# in this way, the Front-End can access the variable directly without having to index the variable first
# dataset["scenarios"]

In [None]:
# save new .nc files
dataset_historical.to_netcdf(path=str(dataset_historical_path).replace(".nc", "_CF.nc"))
dataset_45rcp.to_netcdf(path=str(dataset_rcp45_path).replace(".nc", "_CF.nc"))
dataset_85rcp.to_netcdf(path=str(dataset_rcp85_path).replace(".nc", "_CF.nc"))
dataset.to_netcdf(path=dataset_dir.joinpath(dataset_out_file + "_CF.nc"))

### Check CF compliancy altered NetCDF files

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(dataset_historical_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap,
    testfile=str(dataset_historical_path).replace(".nc", "_CF.nc"),
    working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(dataset_rcp45_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(dataset_rcp45_path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(dataset_rcp85_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=str(dataset_rcp85_path).replace(".nc", "_CF.nc"), working_dir=CF_dir,
)

In [None]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=dataset_dir.joinpath(dataset_out_file + "_CF.nc"), working_dir=CF_dir)

In [None]:
# save altered CF compliancy
save_compliancy(
    cap, testfile=dataset_dir.joinpath(dataset_out_file + "_CF.nc"), working_dir=CF_dir,
)

### write data to Zarr files

In [None]:
# export to zarr in write mode (to overwrite if exists)
dataset.to_zarr(dataset_dir.joinpath("%s.zarr" % dataset_out_file), mode="w")