# Sea level projection
### San Sebastian | Genova | La Rochelle | Valencia
Written by Math van Soest - Deltares 
17-08-'23

In [1]:
# Import standard packages
import os
import pathlib

import sys
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
import math
import itertools
import glob

# Import custom functionality
from coclicodata.drive_config import p_drive
from coclicodata.etl.cf_compliancy_checker import check_compliancy, save_compliancy

# Define (local and) remote drives
gca_data_dir = p_drive.joinpath("11205479-coclico","FULLTRACK_DATA","WP3")

# Workaround to the Windows OS (10) udunits error after installation of cfchecker: https://github.com/SciTools/iris/issues/404
os.environ["UDUNITS2_XML_PATH"] = str(
    pathlib.Path().home().joinpath(  # change to the udunits2.xml file dir in your Python installation
        r"Anaconda3\pkgs\udunits2-2.2.28-h892ecd3_0\Library\share\udunits\udunits2.xml"
    )
)


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [2]:
# Project paths & files (manual input)
dataset_dir = gca_data_dir.joinpath("pilot")
dataset_dir_path = dataset_dir.joinpath("SLP_MvS.nc")
ds_out_file = "SLP_MvS"
CF_dir = gca_data_dir.joinpath(r"CF")  # directory to save output CF check files

In [3]:
# write csv to netcdf

# open all csv files in different dirs
all_files = []
for dir in os.listdir(dataset_dir):
    #if '.' not in dir: # arbitrary, no file extension to determine whether it is dir
        #all_files.append(glob.glob(os.path.join(dataset_dir,  "*.CSV")))
        #all_files.append(os.path.join(dataset_dir,  "*.csv"))
    if "csv" in dir:
        all_files.append(os.path.join(dataset_dir, dir))

# read csv and convert to nc files
li = []
for filename in all_files: #list(itertools.chain(*all_files)):
   
    df = pd.read_csv(filename, index_col=None, header=0)
    li.append(df)

    ds = xr.Dataset.from_dataframe(df)
    ds.to_netcdf(filename.replace('.csv', '.nc'))

# make one dataframe
df = pd.concat(li, axis=0, ignore_index=True)

# Convert the pandas dataframe to an xarray dataset
ds = xr.Dataset.from_dataframe(df)

# Write the xarray dataset to a netCDF file
ds.to_netcdf(dataset_dir.joinpath("SLP_MvS.nc"))

In [4]:
# open datasets (only first file, rest is the same)
ds = xr.open_dataset(all_files[0].replace('.csv', '.nc'))

# check original dataset
ds

In [5]:
%%capture cap --no-stderr
# check original CF compliancy (for first file)

check_compliancy(testfile=all_files[0].replace('.csv', '.nc'), 
                 working_dir=CF_dir
                 )       

In [6]:
# save original CF compliancy (for first file)
save_compliancy(cap, testfile=all_files[0].replace('.csv', '.nc'), working_dir=CF_dir)          



### Make CF compliant alterations to the NetCDF files (dataset dependent)


In [7]:
# open datasets
ds_list = []
for i in all_files:
    ds_list.append(xr.open_dataset(i.replace('.csv', '.nc')))

In [8]:
import json

# NetCDF attribute alterations by means of metadata template
f_global = open(gca_data_dir.joinpath("metadata", "metadata_SLP.json"))
meta_global = json.load(f_global)

for i in ds_list:
    for attr_name, attr_val in meta_global.items():
        if attr_name == 'PROVIDERS':
            attr_val = json.dumps(attr_val)
        i.attrs[attr_name] = attr_val

    i.attrs['Conventions'] = "CF-1.8"

In [9]:
# NetCDF variable and dimension alterations (per dataset)
ds_list_CF = []
ensemble_vars = ["low","median","high"]

for i, j in zip(ds_list, all_files):

    # extract values from xarray dataset and concatenate using numpy, then reshape to match dataset dimensions
    data_array = np.concatenate([i[var].values for var in ensemble_vars])
    data_array_r = data_array.reshape((len(ensemble_vars), len(i['low'].values)))
    
    # transform year data from integer to pd.time
    i = i.rename_vars({"years":"time"})
    i = i.set_coords(['time'])
    i = i.assign(time=('time', i['time'].values))
    
    # remove unconcatenated variables, to be filled in later using data_array_r
    i = i.drop(['index']+ensemble_vars)

    # expand dimension with data variables
    i = i.assign(ensemble=("nensemble", np.array(ensemble_vars, dtype="S")))
    i = i.set_coords("ensemble")
 
    # # add reshaped data array to the xarray dataset
    i = i.assign(slr=(["nensemble", "time"], data_array_r))

    # add or change certain variable / coordinate attributes
    dataset_attributes = {
        "slr": {"long_name": "sea level projections", "units": "m"},
        "time": {"long_name": "time projections", "units": "yr"},
        "ensemble": {"long_name": "ensemble", "units": "1"}, # set to 1 if no unit
    }  # specify custom (CF convention) attributes

    # add / overwrite attributes
    for k, v in dataset_attributes.items():
        try:
            i[k].attrs = dataset_attributes[k]
        except:
            continue

    i.to_netcdf(path=str(j).replace(".csv", "_CF.nc")) # save single CF compliant files

    ds_list_CF.append(i)

In [10]:
ds_list[0]

In [11]:
ds_list_CF[0]

In [12]:
ds_list_CF[0]["slr"]

In [13]:
ds_list_CF[0]["slr"].sel(time=2050)

In [14]:
# List all files that end with '_CF.nc'
CF_files = [f for f in os.listdir(dataset_dir) if f.endswith('_CF.nc') and f.startswith("MSL")]

# Extract unique locations and climate projection scenario's (ssps)
locs = list(np.unique([f.split('_')[1] for f in CF_files]))
ssps = list(np.unique([f.split('_')[2] for f in CF_files]))

ds_list_CF_merge1 = []
for loc in locs:
    ds_ls = []
    for ds, CF_file in zip(ds_list_CF, CF_files):
        if loc in CF_file:
            print(CF_file)
            ds_ls.append(ds)
    ds_list_CF_merge1.append(ds_ls)

MSL_Genova_high_end_CF.nc
MSL_Genova_ssp126_CF.nc
MSL_Genova_ssp245_CF.nc
MSL_Genova_ssp585_CF.nc
MSL_LaRochelle_high_end_CF.nc
MSL_LaRochelle_ssp126_CF.nc
MSL_LaRochelle_ssp245_CF.nc
MSL_LaRochelle_ssp585_CF.nc
MSL_SanSebastian_high_end_CF.nc
MSL_SanSebastian_ssp126_CF.nc
MSL_SanSebastian_ssp245_CF.nc
MSL_SanSebastian_ssp585_CF.nc
MSL_Valencia_high_end_CF.nc
MSL_Valencia_ssp126_CF.nc
MSL_Valencia_ssp245_CF.nc
MSL_Valencia_ssp585_CF.nc


In [15]:
ds_list_CF_merge2 = []
for ds_lists, loc in zip(ds_list_CF_merge1, locs):
    ds = xr.concat(ds_lists, dim='nscenarios')
    ds = ds.assign(scenarios=(["nscenarios"], np.array(ssps, dtype="S")))
    ds = ds.set_coords("scenarios")
    
    # add or change certain variable / coordinate attributes
    dataset_attributes = {"scenarios": {"long_name": "shared socioeconomic pathways", "units": "1"} # set to 1 if no unit
        }  # specify custom (CF convention) attributes
    
    # add / overwrite attributes
    for k, v in dataset_attributes.items():
        try:
                ds[k].attrs = dataset_attributes[k]
        except:
                continue
    
    ds_list_CF_merge2.append(ds)

# merge areas
ds_CF_merge = xr.concat(ds_list_CF_merge2, dim="nstations")
ds_CF_merge = ds_CF_merge.assign(stations=(["nstations"], np.array(locs, dtype="S")))
ds_CF_merge = ds_CF_merge.set_coords("stations")

# Add coordinates of location (lat,lon)
lat = [44, 46, 43.5, 39.5]
lon = [9, -1.5, -2, 0]

ds_CF_merge = ds_CF_merge.assign_coords(lat=("nstations",lat))
ds_CF_merge = ds_CF_merge.assign_coords(lon=("nstations",lon))

dataset_attributes = {"stations": {"long_name": "stations", "units": "1"},
                      "lat": {"standard_name": "latitude", "units": "degrees_north"},
                      "lon": {"standard_name": "longitude", "units": "degrees_east"} 
        }  # specify custom (CF convention) attributes

for k, v in dataset_attributes.items():
        try:
                ds_CF_merge[k].attrs = dataset_attributes[k]
        except:
                continue

In [17]:
ds_CF_merge

In [18]:
# write to NetCDF file to check compliancy

# prevent file locking, see: https://github.com/pydata/xarray/issues/2376
import os
os.environ['HDF5_USE_FILE_LOCKING'] = 'FALSE'

ds_CF_merge.to_netcdf(path=str(dataset_dir_path).replace(".nc", "_CF.nc"))

In [19]:
%%capture cap --no-stderr

# check altered CF compliancy
check_compliancy(testfile=str(dataset_dir_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)

In [20]:
# save original CF compliancy (for first file)
save_compliancy(cap, testfile=str(dataset_dir_path).replace(".nc", "_CF.nc"), working_dir=CF_dir)   



In [21]:
# export to zarr in write mode (to overwrite if exists)
ds_CF_merge.to_zarr(dataset_dir.joinpath("%s.zarr" % ds_out_file), mode="w")

<xarray.backends.zarr.ZarrStore at 0x2496a108c10>

In [22]:
# check
check = xr.open_zarr(dataset_dir.joinpath("%s.zarr" % ds_out_file))

In [23]:
check

Unnamed: 0,Array,Chunk
Bytes,18 B,18 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
"Array Chunk Bytes 18 B 18 B Shape (3,) (3,) Dask graph 1 chunks in 2 graph layers Data type |S6 numpy.ndarray",3  1,

Unnamed: 0,Array,Chunk
Bytes,18 B,18 B
Shape,"(3,)","(3,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32 B 32 B Shape (4,) (4,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32 B 32 B Shape (4,) (4,) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,24 B,24 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
"Array Chunk Bytes 24 B 24 B Shape (4,) (4,) Dask graph 1 chunks in 2 graph layers Data type |S6 numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,24 B,24 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S12 numpy.ndarray,|S12 numpy.ndarray
"Array Chunk Bytes 48 B 48 B Shape (4,) (4,) Dask graph 1 chunks in 2 graph layers Data type |S12 numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,48 B,48 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,|S12 numpy.ndarray,|S12 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,1.50 kiB,1.50 kiB
Shape,"(4, 4, 3, 4)","(4, 4, 3, 4)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 1.50 kiB 1.50 kiB Shape (4, 4, 3, 4) (4, 4, 3, 4) Dask graph 1 chunks in 2 graph layers Data type float64 numpy.ndarray",4  1  4  3  4,

Unnamed: 0,Array,Chunk
Bytes,1.50 kiB,1.50 kiB
Shape,"(4, 4, 3, 4)","(4, 4, 3, 4)"
Dask graph,1 chunks in 2 graph layers,1 chunks in 2 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray


In [24]:
tester = check.isel(nstations=0, nscenarios=1, nensemble=0)
print(tester.stations.values)
print(tester.scenarios.values)
print(tester.ensemble.values)
print(tester.slr.values)
tester

b'Genova'
b'ssp126'
b'low'
[0.06440561 0.14773355 0.32510475 0.41863393]


Unnamed: 0,Array,Chunk
Bytes,6 B,6 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
Array Chunk Bytes 6 B 6 B Shape () () Dask graph 1 chunks in 3 graph layers Data type |S6 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,6 B,6 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Dask graph 1 chunks in 3 graph layers Data type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
Array Chunk Bytes 8 B 8 B Shape () () Dask graph 1 chunks in 3 graph layers Data type float64 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,8 B,8 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,6 B,6 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray
Array Chunk Bytes 6 B 6 B Shape () () Dask graph 1 chunks in 3 graph layers Data type |S6 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,6 B,6 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S6 numpy.ndarray,|S6 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,12 B,12 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S12 numpy.ndarray,|S12 numpy.ndarray
Array Chunk Bytes 12 B 12 B Shape () () Dask graph 1 chunks in 3 graph layers Data type |S12 numpy.ndarray,,

Unnamed: 0,Array,Chunk
Bytes,12 B,12 B
Shape,(),()
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,|S12 numpy.ndarray,|S12 numpy.ndarray

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
"Array Chunk Bytes 32 B 32 B Shape (4,) (4,) Dask graph 1 chunks in 3 graph layers Data type float64 numpy.ndarray",4  1,

Unnamed: 0,Array,Chunk
Bytes,32 B,32 B
Shape,"(4,)","(4,)"
Dask graph,1 chunks in 3 graph layers,1 chunks in 3 graph layers
Data type,float64 numpy.ndarray,float64 numpy.ndarray
