# Drivers of shoreline evolution

Notebook environment to migrate netcdf files to CF compliant zarr

In [1]:
# Optional; code formatter, installed as jupyter lab extension
#%load_ext lab_black
# Optional; code formatter, installed as jupyter notebook extension
%load_ext nb_black

<IPython.core.display.Javascript object>

### Configure OS independent paths

In [2]:
# Import standard packages
import os
import pathlib

import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import math

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))
sys.path.append(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\coclicodata')

# Get root paths
home = pathlib.Path().home()
root = home.root

# Import custom functionality
from etl import p_drive
from etl.CF_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
gca_data_dir = p_drive.joinpath("1000545-054-globalbeaches", "15_GlobalCoastalAtlas", "datasets")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    home.joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In a future release, GeoPandas will switch to using Shapely by default. If you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


<IPython.core.display.Javascript object>

In [3]:
# Project paths & files (manual input)
dataset_dir = gca_data_dir.joinpath("07_Drivers")
dataset_dir_driver = dataset_dir.joinpath("drivers.nc")
dataset_out_file = "drivers"
CF_dir = gca_data_dir.joinpath(r"CF")  # directory to save output CF check files

<IPython.core.display.Javascript object>

In [4]:
# # # # write csv to netcdf

# # # # Load data from CSV file into a pandas dataframe
# csv_dir = r'P:\11202268-hydraulic-engineering\MSc_students\Dante_van_der_Heijden\02. Data\01. Final_data\csv_files\all_drivers.csv'
# df = pd.read_csv(csv_dir)

# # Convert the pandas dataframe to an xarray dataset
# ds = xr.Dataset.from_dataframe(df)

# # Write the xarray dataset to a netCDF file
# ds.to_netcdf(dataset_dir_driver)

<IPython.core.display.Javascript object>

### Check CF compliancy original NetCDF files

In [5]:
# open datasets
df_driver = xr.open_dataset(dataset_dir_driver)

# check original dataset
df_driver

<IPython.core.display.Javascript object>

In [6]:
%%capture cap --no-stderr
# check original CF compliancy

check_compliancy(testfile=dataset_dir_driver, 
                 working_dir=CF_dir
                 )


<IPython.core.display.Javascript object>

In [7]:
# save original CF compliancy
save_compliancy(cap, testfile=dataset_dir_driver, working_dir=CF_dir)



<IPython.core.display.Javascript object>

### Make CF compliant alterations to the NetCDF files (dataset dependent)

In [8]:
import json

# NetCDF attribute alterations
f_global = open(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\datasets\07_Drivers\metadata_shorelinemonitor_drivers.json')
meta_global = json.load(f_global)

for attr_name, attr_val in meta_global.items():
    if attr_name == 'PROVIDERS':
        attr_val = json.dumps(attr_val)
    df_driver.attrs[attr_name] = attr_val

df_driver.attrs['Conventions'] = "CF-1.8"

<IPython.core.display.Javascript object>

In [9]:
df_driver

<IPython.core.display.Javascript object>

In [10]:
start_lats = []
for x in df_driver['transect_origin_y'].values:
    try:
        x2 = float(x)
    except:
        x2 = float(str(x)[:-2])
    finally:
        start_lats.append(x2)

<IPython.core.display.Javascript object>

In [11]:
# combine start and end coordinates into a transect
from shapely.geometry import LineString

start_lons = df_driver["transect_origin_x"].values
end_lons = df_driver["transect_end_x"].values
end_lats = df_driver["transect_end_y"].values
coords = zip(zip(start_lons, start_lats), zip(end_lons, end_lats))

df_driver["transect_geom"] = (
    ["index"],
    [str(LineString(line)) for line in coords],
)
df_driver["transect_geom"].attrs["long_name"] = "Transect Geometry"

<IPython.core.display.Javascript object>

In [12]:
df_driver = df_driver.drop('transect_origin_y')
df_driver["transect_origin_y"] = ("index", start_lats,)

<IPython.core.display.Javascript object>

In [13]:
df_driver = df_driver.drop(['hotspot_id', 'transect_end_x', 'transect_end_y', 'P_mis'])

<IPython.core.display.Javascript object>

In [14]:
# NetCDF variable and dimension alterations

# rename or swap dimension names, the latter in case the name already exists as coordinate
df_driver = df_driver.rename_dims({"index": "stations"})

f_vars = open(r'P:\1000545-054-globalbeaches\15_GlobalCoastalAtlas\datasets\07_Drivers\vars_drivers.json')
meta_vars = json.load(f_vars)

for var_name, var_dict in meta_vars.items():
    df_driver = df_driver.rename_vars({var_name : var_dict['name']})
    for key, value in var_dict.items():
        if key != 'name':
            df_driver[var_dict['name']].attrs[key] = value

<IPython.core.display.Javascript object>

In [15]:
# change dtypes
object_vars = ['transect_id', 'country_id', 'country', 'continent', 'transect_geom', 'hotspot_id', 
               'seasonal_id_conf', 'reclamation_id_conf', 'nourishment_id_conf', 'littoraldb_id_conf', 'date_nourishment', 'ldb_type']
for ov in object_vars:
    df_driver[ov] = df_driver[ov].astype('S')

<IPython.core.display.Javascript object>

In [16]:
# change dtypes
object_vars = ['lon', 'lat', 'changerate', 'seasonal_displacement', 't_min_seasonal_sp', 't_max_seasonal_sp', 't_recl_construction']
for ov in object_vars:
    df_driver[ov] = df_driver[ov].astype('float')

<IPython.core.display.Javascript object>

In [17]:
all_vars = list(df_driver.keys())
data_vars = ['seasonal_displacement', 't_min_seasonal_sp', 't_max_seasonal_sp', 
             't_recl_construction', 'date_nourishment', 'label_littoraldb', 'ldb_type', 'changerate',
             'seasonal_id_conf', 'reclamation_id_conf', 'nourishment_id_conf', 'littoraldb_id_conf',]
# set some data variables to coordinates to avoid duplication of dimensions in later stage
df_driver = df_driver.set_coords([v for v in all_vars if v not in data_vars])

# drop index xarray
df_driver = df_driver.drop('index')

<IPython.core.display.Javascript object>

In [18]:
# check the xarray dataset, best practice is to have as many as possible bold dimensions (dimension == coordinate name).
# in this way, the Front-End can access the variable directly without having to index the variable first
# dataset["scenarios"]
df_driver

<IPython.core.display.Javascript object>

In [19]:
# save new .nc files
df_driver.to_netcdf(path=str(dataset_dir_driver).replace(".nc", "_CF.nc"))

<IPython.core.display.Javascript object>

### Check CF compliancy altered NetCDF files

In [20]:
%%capture cap --no-stderr
# check altered CF compliancy

check_compliancy(testfile=str(dataset_dir_driver).replace(".nc", "_CF.nc"), working_dir=CF_dir)

<IPython.core.display.Javascript object>

In [21]:
# save altered CF compliancy
save_compliancy(
    cap,
    testfile=str(dataset_dir_driver).replace(".nc", "_CF.nc"),
    working_dir=CF_dir,
)



<IPython.core.display.Javascript object>

### write data to Zarr files

In [22]:
# export to zarr in write mode (to overwrite if exists)
df_driver.to_zarr(dataset_dir.joinpath("%s.zarr" % dataset_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x18ee4d01e40>

<IPython.core.display.Javascript object>

In [23]:
test = xr.open_zarr(dataset_dir.joinpath("%s.zarr" % dataset_out_file))

<IPython.core.display.Javascript object>

In [24]:
test

Unnamed: 0,Array,Chunk
Bytes,735.36 kiB,183.84 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S13 numpy.ndarray,|S13 numpy.ndarray
"Array Chunk Bytes 735.36 kiB 183.84 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S13 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,735.36 kiB,183.84 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S13 numpy.ndarray,|S13 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.49 MiB,381.82 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S27 numpy.ndarray,|S27 numpy.ndarray
"Array Chunk Bytes 1.49 MiB 381.82 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S27 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,1.49 MiB,381.82 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S27 numpy.ndarray,|S27 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,395.96 kiB,197.98 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S7 numpy.ndarray,|S7 numpy.ndarray
"Array Chunk Bytes 395.96 kiB 197.98 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type |S7 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,395.96 kiB,197.98 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S7 numpy.ndarray,|S7 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.71 MiB,438.39 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S31 numpy.ndarray,|S31 numpy.ndarray
"Array Chunk Bytes 1.71 MiB 438.39 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S31 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,1.71 MiB,438.39 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S31 numpy.ndarray,|S31 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,3.15 MiB,403.06 kiB
Shape,"(57924,)","(7241,)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,|S57 numpy.ndarray,|S57 numpy.ndarray
"Array Chunk Bytes 3.15 MiB 403.06 kiB Shape (57924,) (7241,) Dask graph 8 chunks in 2 graph layers Data type |S57 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,3.15 MiB,403.06 kiB
Shape,"(57924,)","(7241,)"
Dask graph,8 chunks in 2 graph layers,8 chunks in 2 graph layers
Data type,|S57 numpy.ndarray,|S57 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,848.50 kiB,212.12 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray
"Array Chunk Bytes 848.50 kiB 212.12 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S15 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,848.50 kiB,212.12 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S15 numpy.ndarray,|S15 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.66 MiB,424.25 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S30 numpy.ndarray,|S30 numpy.ndarray
"Array Chunk Bytes 1.66 MiB 424.25 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S30 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,1.66 MiB,424.25 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S30 numpy.ndarray,|S30 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,791.93 kiB,197.98 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S14 numpy.ndarray,|S14 numpy.ndarray
"Array Chunk Bytes 791.93 kiB 197.98 kiB Shape (57924,) (14481,) Dask graph 4 chunks in 2 graph layers Data type |S14 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,791.93 kiB,197.98 kiB
Shape,"(57924,)","(14481,)"
Dask graph,4 chunks in 2 graph layers,4 chunks in 2 graph layers
Data type,|S14 numpy.ndarray,|S14 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
"Array Chunk Bytes 339.40 kiB 169.70 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type |S6 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
"Array Chunk Bytes 339.40 kiB 169.70 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type |S6 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
"Array Chunk Bytes 339.40 kiB 169.70 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type |S6 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,339.40 kiB,169.70 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,395.96 kiB,197.98 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S7 numpy.ndarray,|S7 numpy.ndarray
"Array Chunk Bytes 395.96 kiB 197.98 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type |S7 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,395.96 kiB,197.98 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,|S7 numpy.ndarray,|S7 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 452.53 kiB 226.27 kiB Shape (57924,) (28962,) Dask graph 2 chunks in 2 graph layers Data type float64 numpy.ndarray",57924  1,

Unnamed: 0,Array,Chunk
Bytes,452.53 kiB,226.27 kiB
Shape,"(57924,)","(28962,)"
Dask graph,2 chunks in 2 graph layers,2 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


<IPython.core.display.Javascript object>

In [25]:
np.unique(test['seasonal_id_conf'].values)

array([b'high', b'low', b'medium', b'unknown'], dtype='|S7')

<IPython.core.display.Javascript object>