# Cost and Benefit Coastal Adaptation

Notebook environment to migrate netcdf files to zarr and geojson

In [None]:
# Use the black code formatter
%load_ext lab_black

### Configure OS independent paths

In [None]:
import os
import pathlib
import sys

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))


# Get root paths
home = pathlib.Path().home()
root = home.root

# Define both local and remote drives
local_data_dir = home.joinpath("ddata")
local_temp_dir = local_data_dir.joinpath("tmp")
p_dir = pathlib.Path(root, "p")
coclico_data_dir = p_dir.joinpath("11205479-coclico", "data")
coclico_cf_dir = coclico_data_dir.joinpath("CF")
ds_dirname = "06_adaptation_jrc"

# Project paths
local_auth_dir = local_data_dir.joinpath("AUTH_files")
remote_auth_dir = coclico_data_dir.joinpath("AUTH_files")
netcdf_dir = pathlib.Path("netcdf_files", "06.Coast and benefits of coastal adaptation")
json_dir = pathlib.Path("json_files", "06.Coast and benefits of coastal adaptation")

In [None]:
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr

In [None]:
def get_fp(fn, suffix, remote_drive=True):
    file_dirs = {
        ".json": pathlib.Path(
            "json_files", "06.Coast and benefits of coastal adaptation"
        ),
        ".nc": pathlib.Path(
            "netcdf_files", "06.Coast and benefits of coastal adaptation"
        ),
    }
    local_auth_dir = local_data_dir.joinpath("tmp", "AUTH_files")
    remote_auth_dir = coclico_data_dir.joinpath("temp", "AUTH_files")

    if not remote_drive:
        return local_auth_dir.joinpath(file_dirs[suffix]).joinpath(fn + suffix)
    return remote_auth_dir.joinpath(file_dirs[suffix]).joinpath(fn + suffix)

In [None]:
fn_benefit = "benefitNoDiscounting"
fn_cost = "costNoDiscounting"
fn_cbr = "cbr"
fn_protection = "dZprotectionMean"

files = [fn_benefit, fn_cost, fn_cbr, fn_protection]

In [None]:
ds_benefit, ds_cost, ds_cbr, ds_protection = [
    xr.load_dataset(get_fp(fn, suffix=".nc", remote_drive=False)) for fn in files
]

In [None]:
df_benefit, df_cost, df_cbr, df_protection = [
    pd.read_json(get_fp(fn, suffix=".json", remote_drive=False)) for fn in files
]

### Load in raw data from p drive (excel sheets)

The nuts regions are not included as attributes in the netcdf files. The ones from the excel sheet are not present in recent nuts regsion shapefile by the EU. Therefore, project coordinates from data into current nuts regions. 

In [None]:
xlsx_benefit, xlsx_cost, xlsx_cbr, xlsx_protection = [
    pd.read_excel(local_temp_dir.joinpath("06_adaptation_jrc", f"{fn}.xlsx"))
    for fn in files
]

In [None]:
from functools import reduce

xlsx_dfs = xlsx_benefit, xlsx_cost, xlsx_cbr, xlsx_protection
xlsx_merged = reduce(
    lambda l, r: pd.merge(l, r, on=["NUTS2 ID"], how="outer"), xlsx_dfs
)

### Add nuts region

Nuts regions are obtained from eurostat, but the most recent nuts regions files do not
match the ones which are used in the datasets. The files describing the 2010 nuts regions
seem to match with the regions used in the studies. 

In [None]:
nuts_regions = gpd.read_file(
    local_data_dir.joinpath("tmp", "NUTS_RG_20M_2010_3857.shp")
)
nuts_regions = nuts_regions.to_crs("EPSG:4326")

In [None]:
# use one of the datasets to create a geodataframe
df_cost = df_cost.rename(
    {
        "latitude(degrees north of the NUTS2 regions centroid)": "latitude",
        "longitude(degrees east of the NUTS2 regions centroid)": "longitude",
    },
    axis="columns",
)

gdf_cost = gpd.GeoDataFrame(
    df_cost,
    geometry=gpd.points_from_xy(df_cost.longitude, df_cost.latitude),
    crs="EPSG:4326",
)

# Add nuts column from excel data
gdf_cost["NUTS_ID"] = xlsx_cost["NUTS2 ID"]

# inner join to keep only nuts regions used in dataset
nuts_regions = nuts_regions.merge(gdf_cost, on=["NUTS_ID"], how="inner")

# format dataframe
nuts_regions["instance"] = nuts_regions.index.values
nuts_regions = nuts_regions[
    ["instance", "NUTS_ID", "NAME_LATN", "CNTR_CODE", "geometry_x"]
]
nuts_regions = nuts_regions.rename(
    {
        "NUTS_ID": "acronym",
        "NAME_LATN": "name",
        "CNTR_CODE": "country",
        "geometry_x": "geometry",
    },
    axis="columns",
)
nuts_regions = gpd.GeoDataFrame(nuts_regions, crs="EPSG:4326")
nuts_regions.head()

# write result to geojson
# nuts_regions.to_file(
#     coclico_data_dir.joinpath("06_adaptation_jrc", "nuts_regions.geojson"),
#     driver="GeoJSON",
# )

## Make datasets CF compliant 

In [None]:
# set lon/lat coordinates for each of the datasets
ds_benefit, ds_cost, ds_cbr, ds_protection = [
    ds.set_coords(["lon", "lat"]) for ds in [ds_benefit, ds_cost, ds_cbr, ds_protection]
]

In [None]:
def reshape_dataset(ds: xr.Dataset, var_name: str) -> xr.Dataset:
    """Store variables in dimension scenarios.

    Dataset contains multiple variables which each represent a combined RCP-SSP scenario.
    These are extracted and stored in one common dimension scenarios.

    Args:
        ds (xr.Dataset): _description_
        var_name (str): _description_

    Returns:
        xr.Dataset: _description_
    """
    ds_ = ds.copy()
    ds_ = (
        ds_.to_array("nscenarios", var_name)
        .to_dataset()
        .reset_index("nscenarios", drop=True)
        .assign_coords(
            scenarios=("nscenarios", np.array(["RCP45-SSP1", "RCP85-SSP5"], dtype="S"))
        )
    )

    ds_ = ds_.rename_dims({"row": "stations"})
    ds_ = ds_.transpose("nscenarios", "stations")
    return ds_


ds_benefit_ = reshape_dataset(ds_benefit, "benefit")
ds_cost_ = reshape_dataset(ds_cost, "cost")
ds_cbr_ = reshape_dataset(ds_cbr, "cbr")
ds_protection_ = reshape_dataset(ds_protection, "eb")
ds = xr.merge([ds_benefit_, ds_cost_, ds_cbr_, ds_protection_])

### Add geometries from NUTS regions as coordinates

In [None]:
from shapely import wkb

# extract geometries of nut2 regions in well-known binary format
geoms = nuts_regions["geometry"].apply(lambda x: wkb.dumps(x))

# rename dims and add new data to dataset
ds = ds.assign_coords({"geometry": ("stations", geoms)})

In [None]:
# add global attrs
ds.attrs["Conventions"] = "CF-1.8"
ds.attrs["crs"] = 4326

# add coordinates attributes
ds["lon"].attrs["standard_name"] = "longitude"
ds["lon"].attrs["units"] = "degrees_east"
ds["lon"].attrs[
    "long_name"
] = "Longitude of the centroid of the NUTS2 region (2010 version)."
del ds["lon"].attrs["_CoordinateAxisType"]

ds["lat"].attrs["standard_name"] = "latitude"
ds["lat"].attrs["units"] = "degrees_north"
ds["lat"].attrs[
    "long_name"
] = "Latitude of the centroid of the NUTS2 region (2010 version)."
del ds["lat"].attrs["_CoordinateAxisType"]

ds["geometry"].attrs = {
    "long_name": "NUTS2 regions (polygons) in well-known binary format (wkb).",
    "geometry_type": "polygon",
    "units": "degree",
    "comment": "These NUTS2 regions (2010 version) are available at Eurostat.",
    "crs_wkt": f"{nuts_regions.crs.to_epsg()}",
}

ds["scenarios"].attrs = {"long_name": "Combined RCP and SSP climate scenarios."}


# add variable attributes
ds["benefit"].attrs = {"long_name": ds_benefit.attrs["title"], "units": "EUR 1 000 000"}
ds["cost"].attrs = {"long_name": ds_cost.attrs["title"], "units": "EUR 1 000 000"}
ds["cbr"].attrs = {"long_name": ds_benefit.attrs["title"]}
ds["eb"].attrs = {"long_name": ds_protection.attrs["title"]}

### Run CF checker

In [None]:
# save current dataset as netcdf in tmp directory
ds_outpath = local_temp_dir.joinpath("cbca_CF.nc")
ds.to_netcdf(path=ds_outpath)

In [None]:
ds_outpath

In [None]:
# check using cfecker python library (default settings, hence, most current var, region, ..., etc. names)
from cfchecker.cfchecks import CFChecker

CFChecker().checker(str(ds_outpath))

### Write CF logs to p_drive as backlog

In [None]:
# define paths to save log files

cf_dir = coclico_cf_dir.joinpath(ds_dirname)
if not cf_dir.exists():
    cf_dir.mkdir()

In [None]:
from contextlib import redirect_stdout

# write CF logs to p_drive
with open(cf_dir.joinpath(ds_outpath.stem).with_suffix(".check"), "w") as f:
    with redirect_stdout(f):
        CFChecker().checker(str(ds_outpath))

### Copy files from local to p_drive

In [None]:
import shutil

# #TODO: fix permission error when copying to p_drive
# shutil.copy(ds_outpath, coclico_data_dir.joinpath(ds_dirname, ds_outpath.name))

# workaround: print cp command to use in shell
print(f"cp '{ds_outpath}' '{coclico_data_dir.joinpath(ds_dirname, ds_outpath.name)}'")

# Save to zarr store

In [None]:
zarr_store_fp = local_temp_dir.joinpath("eu_coastal_adaptation.zarr")
ds.to_zarr(zarr_store_fp, mode="w")

In [None]:
pathlib.Path("/p/11205479-coclico/data/06_adaptation_jrc/cbca_CF.nc").exists()