# Cost and Benefit Coastal Adaptation

Notebook environment to migrate netcdf files to zarr and geojson

In [1]:
# Use the black code formatter
%load_ext lab_black

### Configure OS independent paths

In [2]:
import os
import pathlib
import sys

# Make root directories importable by appending root to path
cwd = pathlib.Path().resolve()
sys.path.append(os.path.dirname(cwd))


# Get root paths
home = pathlib.Path().home()
root = home.root

# Define both local and remote drives
local_data_dir = home.joinpath("ddata")
p_dir = pathlib.Path(root, "p")
coclico_data_dir = p_dir.joinpath("11205479-coclico", "data")

# Project paths
local_auth_dir = local_data_dir.joinpath("AUTH_files")
remote_auth_dir = coclico_data_dir.joinpath("AUTH_files")
netcdf_dir = pathlib.Path("netcdf_files", "06.Coast and benefits of coastal adaptation")
json_dir = pathlib.Path("json_files", "06.Coast and benefits of coastal adaptation")

In [3]:
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr

In [4]:
def get_fp(fn, suffix, remote_drive=True):
    file_dirs = {
        ".json": pathlib.Path(
            "json_files", "06.Coast and benefits of coastal adaptation"
        ),
        ".nc": pathlib.Path(
            "netcdf_files", "06.Coast and benefits of coastal adaptation"
        ),
    }
    local_auth_dir = local_data_dir.joinpath("AUTH_files")
    remote_auth_dir = coclico_data_dir.joinpath("AUTH_files")

    if not remote_drive:
        return local_auth_dir.joinpath(file_dirs[suffix]).joinpath(fn + suffix)
    return remote_auth_dir.joinpath(file_dirs[suffix]).joinpath(fn + suffix)

In [5]:
fn_benefit = "benefitNoDiscounting"
fn_cost = "costNoDiscounting"
fn_cbr = "cbr"
fn_protection = "dZprotectionMean"

files = [fn_benefit, fn_cost, fn_cbr, fn_protection]

In [6]:
ds_benefit, ds_cost, ds_cbr, ds_protection = [
    xr.load_dataset(get_fp(fn, suffix=".nc", remote_drive=False)) for fn in files
]

In [7]:
df_benefit, df_cost, df_cbr, df_protection = [
    pd.read_json(get_fp(fn, suffix=".json", remote_drive=False)) for fn in files
]

In [8]:
nuts_regions = gpd.read_file(
    local_data_dir.joinpath("crap", "NUTS_RG_20M_2021_3857.shp")
)
nuts_regions = nuts_regions.to_crs("EPSG:4326")

### Load in raw data from p drive (excel sheets)

The nuts regions are not included as attributes in the netcdf files. The ones from the excel sheet are not present in recent nuts regsion shapefile by the EU. Therefore, project coordinates from data into current nuts regions. 

In [9]:
xlsx_benefit, xlsx_cost, xlsx_cbr, xlsx_protection = [
    pd.read_excel(coclico_data_dir.joinpath("06_adaptation_jrc", f"{fn}.xlsx"))
    for fn in files
]

  warn("Workbook contains no default style, apply openpyxl's default")


In [10]:
from functools import reduce

xlsx_dfs = xlsx_benefit, xlsx_cost, xlsx_cbr, xlsx_protection
xlsx_merged = reduce(
    lambda l, r: pd.merge(l, r, on=["NUTS2 ID"], how="outer"), xlsx_dfs
)

  lambda l, r: pd.merge(l, r, on=["NUTS2 ID"], how="outer"), xlsx_dfs


In [11]:
# These ones do not have matching nuts ID
xlsx_merged[~xlsx_merged["NUTS2 ID"].isin(nuts_regions["NUTS_ID"].unique())]

Unnamed: 0,NUTS2 ID,Sustainability_x,Fossil Fuel Development_x,Sustainability_y,Fossil Fuel Development_y,Sustainability_x.1,Fossil Fuel Development_x.1,Sustainability_y.1,Fossil Fuel Development_y.1
15,EL11,48.607925,112.088865,1.879231,2.448088,3.80138,6.447007,0.081854,0.107404
16,EL12,268.560961,680.248504,6.994262,10.018977,6.441825,10.952814,0.179949,0.262737
17,EL14,53.456706,178.656602,2.257437,3.417903,3.495201,7.239238,0.089658,0.137092
18,EL21,45.427992,117.64328,1.996287,2.712619,3.451028,5.939982,0.080648,0.109923
19,EL22,645.836498,1251.193676,13.44895,15.496088,20.985403,23.610005,0.159546,0.199764
20,EL23,161.618422,414.383409,5.698265,8.134689,4.389671,6.844855,0.124179,0.170159
21,EL24,54.189352,131.038513,3.081566,5.313674,2.770613,3.582319,0.047527,0.08109
22,EL25,75.58217,184.77254,2.966776,5.021141,4.288782,5.539429,0.055053,0.085561
41,FR22,128.999828,400.508227,5.368579,6.551856,7.51351,15.954175,1.136414,1.398906
42,FR23,540.927997,1861.498074,14.295079,18.748476,13.280324,27.616757,0.774165,1.007273


### Infer nuts regions

In [12]:
df_cost = df_cost.rename(
    {
        "latitude(degrees north of the NUTS2 regions centroid)": "latitude",
        "longitude(degrees east of the NUTS2 regions centroid)": "longitude",
    },
    axis="columns",
)

In [13]:
gdf_cost = gpd.GeoDataFrame(
    df_cost,
    geometry=gpd.points_from_xy(df_cost.longitude, df_cost.latitude),
    crs="EPSG:4326",
)

In [14]:
result = gpd.sjoin(nuts_regions[nuts_regions["LEVL_CODE"] == 2], gdf_cost)

In [15]:
result = result[["index_right", "NUTS_ID", "NAME_LATN", "CNTR_CODE", "geometry"]]
result = result.rename(
    {
        "index_right": "ID",
        "NUTS_ID": "Acronym",
        "NAME_LATN": "Name",
        "CNTR_CODE": "Country",
    },
    axis="columns",
)
result.head()

Unnamed: 0,ID,Acronym,Name,Country,geometry
125,8,DEF0,Schleswig-Holstein,DE,"MULTIPOLYGON (((11.27238 54.41471, 11.12805 54..."
149,4,CY00,Kýpros,CY,"POLYGON ((32.27382 35.06649, 32.28884 35.10503..."
172,11,DK03,Syddanmark,DK,"MULTIPOLYGON (((10.73503 54.74956, 10.69169 54..."
173,12,DK04,Midtjylland,DK,"MULTIPOLYGON (((11.59468 56.74416, 11.59850 56..."
174,13,DK05,Nordjylland,DK,"MULTIPOLYGON (((11.22294 57.27150, 11.16377 57..."


### Missing regions

For some nuts regions there is no data, as the NUTS2 identifiers do not match while the data is presented at centroids which are occasionally situated in the sea, hence, no NUTS2 region can be derived. 

In [16]:
# gdf_cost[~gdf_cost.index.isin(result["ID"])].explore()

In [17]:
# nuts_regions[nuts_regions["LEVL_CODE"] == 2].explore()

### Check older NUTS files

In [18]:
nuts_regions = gpd.read_file(
    local_data_dir.joinpath("crap", "NUTS_RG_20M_2010_3857.shp")
)
nuts_regions = nuts_regions.to_crs("EPSG:4326")

In [19]:
# Add nuts column from excel data
gdf_cost["NUTS_ID"] = xlsx_cost["NUTS2 ID"]

In [20]:
nuts_regions.columns

Index(['LEVL_CODE', 'NUTS_ID', 'CNTR_CODE', 'NUTS_NAME', 'NAME_LATN', 'FID',
       'geometry'],
      dtype='object')

In [21]:
result = nuts_regions.merge(gdf_cost, on=["NUTS_ID"], how="inner")

In [25]:
result["id"] = result.index.values
result = result[["id", "NUTS_ID", "NAME_LATN", "CNTR_CODE", "geometry_x"]]
result = result.rename(
    {
        "NUTS_ID": "acronym",
        "NAME_LATN": "name",
        "CNTR_CODE": "country",
        "geometry_x": "geometry",
    },
    axis="columns",
)
result = gpd.GeoDataFrame(result, crs="EPSG:4326")
print(result.shape)
print(f"Type: {type(result)}")
result.head()

(124, 5)
Type: <class 'geopandas.geodataframe.GeoDataFrame'>


Unnamed: 0,id,acronym,name,country,geometry
0,0,BE23,Prov. Oost-Vlaanderen,BE,"POLYGON ((4.31117 51.12615, 4.17579 51.10121, ..."
1,1,BE25,Prov. West-Vlaanderen,BE,"POLYGON ((3.45973 50.76597, 3.45535 50.76456, ..."
2,2,BG33,Severoiztochen,BG,"POLYGON ((28.57888 43.73874, 28.60746 43.53937..."
3,3,BG34,Yugoiztochen,BG,"POLYGON ((26.98672 42.95232, 27.09443 42.95391..."
4,4,CY00,Kýpros,CY,"POLYGON ((33.00572 34.61297, 33.02382 34.58560..."


In [26]:
# result.to_file(
#     coclico_data_dir.joinpath("06_adaptation_jrc", "nuts_regions.geojson"),
#     driver="GeoJSON",
# )

### Zarr data

In [27]:
ds_benefit["lat"].attrs

{'units': 'Degrees north',
 'long_name': 'latitude',
 '_CoordinateAxisType': 'Lat',
 'standard_name': 'latitude of the centroid of NUTS2 regions'}

In [28]:
ds_benefit, ds_cost, ds_cbr, ds_protection

(<xarray.Dataset>
 Dimensions:  (row: 124)
 Dimensions without coordinates: row
 Data variables:
     lat      (row) float64 51.04 51.01 43.37 42.47 ... 55.35 57.24 57.5 54.61
     lon      (row) float64 3.816 3.057 27.33 26.54 ... -2.618 -4.784 -6.694
     sustain  (row) float64 485.1 598.8 4.136 9.219 ... 634.7 8.425 770.7 347.2
     ffd      (row) float64 878.8 1.19e+03 8.261 ... 26.72 2.503e+03 1.403e+03
 Attributes:
     title:        Benefits of raising coastal defences along the European coa...
     description:  Benefit to Cost ratio (after discounting), costs and benefi...
     source:       The data comes from: Vousdoukas, M., Mentaschi, L., Hinkel,...,
 <xarray.Dataset>
 Dimensions:  (row: 124)
 Dimensions without coordinates: row
 Data variables:
     lat      (row) float64 51.04 51.01 43.37 42.47 ... 55.35 57.24 57.5 54.61
     lon      (row) float64 3.816 3.057 27.33 26.54 ... -2.618 -4.784 -6.694
     sustain  (row) float64 9.33 23.56 0.3062 0.8214 ... 28.41 0.8283 39.87

In [29]:
ds_benefit, ds_cost, ds_cbr, ds_protection = [
    ds.set_coords(["lon", "lat"]) for ds in [ds_benefit, ds_cost, ds_cbr, ds_protection]
]

In [36]:
ds = xr.concat(
    [ds_benefit, ds_cost, ds_cbr, ds_protection],
    pd.Index(["benefit", "cost", "cbr", "protection"], name="field"),
)

In [37]:
from shapely import wkt
geoms = result["geometry"].apply(lambda x: wkt.dumps(x))

ds = ds.rename({"row": "id"})
ds = ds.assign_coords(id=("id", range(len(ds["id"]))))
ds = ds.assign_coords(geometry=("id", geoms))
ds = ds.assign_coords(nuts=("id", result["Acronym"]))
ds = ds.assign_coords(name=("id", result["Name"]))
ds = ds.assign_coords(country=("id", result["Country"]))
ds = ds.drop(["lon", "lat"])

KeyError: 'Acronym'

In [35]:
ds

In [None]:
descriptions = {
    "geometry": {
        "long_name": "nuts2 polygons in degrees (wgs-84)",
        "units": "degree",
        "comment": "Describe nuts regions (2010 version), which are available from Eurostat.",
        "crs_wkt": f"{result.crs}",
    },
    "field": {"long_name": "fields of the dataset"},
    "nuts": {
        "long_name": "nuts region acronym",
        "source": "Nuts regions (2010 version), available from Eurostat.",
    },
    "name": {
        "long_name": "nuts region name",
        "source": "Nuts regions (2010 version), available from Eurostat.",
    },
    "country": {"long_name": "country acronym"},
}

In [23]:
for k, v in descriptions.items():
    ds[k].attrs = descriptions[k]

NameError: name 'descriptions' is not defined

In [24]:
ds

NameError: name 'ds' is not defined

### Write to zarr

In [49]:
import zarr

In [50]:
adaptation_dir = coclico_data_dir.joinpath("06_adaptation_jrc")

In [51]:
# ds.to_zarr(
#     adaptation_dir.joinpath("cost_and_benefits_of_coastal_adaptation.zarr"), mode="w"
# )

<xarray.backends.zarr.ZarrStore at 0x7f10e8980430>