## Notebook to process CBA from csv-file to parquet

In [1]:
# Load software
import os
import pathlib
import sys
import shapely
import pystac_client
import pandas as pd
from shapely import Polygon, geometry
from affine import Affine
from rasterio.features import shapes
import json
import itertools
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from dotenv import load_dotenv
import math
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import rioxarray as rio

# Import custom functionality
from coclicodata.drive_config import p_drive

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# Set path to csv data
CBA_dir = coclico_data_dir.joinpath('WP6', 'data', 'CBA')

# List all csv files (first focus on country files)
CBA_files = CBA_dir.glob('*country*.csv')

In [4]:
# Read NUTS from the stac

# Setup the URL to STAC catalog in Google Cloud
catalog = pystac_client.Client.open(
    "https://storage.googleapis.com/coclico-data-public/coclico/coclico-stac-14nov/catalog.json"
)

NUTS = catalog.get_child('NUTS')

# Get the href to the NUTS data
cloud_NUTS_path = NUTS.assets['geoparquet-stac-items'].href

# Retrieve actual data using regular pandas, loading with geopandas is very slow
NUTS_data = pd.read_parquet(cloud_NUTS_path)

# Because we load with regular pandas the polygon data needs to be converted from WKB - Well-Known Binary to shapely.Polygon
NUTS_data['geometry'] = NUTS_data['geometry'].apply(shapely.wkb.loads)

# Now convert to geopandas
NUTS_data = gpd.GeoDataFrame(NUTS_data, geometry='geometry', crs='EPSG:3035')

NUTS_data

c:\Users\kras\AppData\Local\mambaforge\envs\coclico\Lib\site-packages\pystac_client\client.py:187: NoConformsTo: Server does not advertise any conformance classes.


Unnamed: 0,NUTS_ID,LEVL_CODE,CNTR_CODE,NAME_LATN,NUTS_NAME,MOUNT_TYPE,URBN_TYPE,COAST_TYPE,geometry
0,AL,0,AL,b'5368716970eb726961',b'5368716970eb726961',0.0,0,0,"MULTIPOLYGON (((5121233.536 2221719.441, 51208..."
1,CZ,0,CZ,b'?esko',b'?esko',0.0,0,0,"POLYGON ((4624843.654 3112209.741, 4625546.618..."
2,DE,0,DE,b'Deutschland',b'Deutschland',0.0,0,0,"MULTIPOLYGON (((4355225.365 2715902.993, 43541..."
3,DK,0,DK,b'Danmark',b'Danmark',0.0,0,0,"MULTIPOLYGON (((4650502.736 3591342.844, 46503..."
4,CY,0,CY,b'4bfd70726f73',b'??????',0.0,0,0,"MULTIPOLYGON (((6527040.718 1762367.593, 65267..."
...,...,...,...,...,...,...,...,...,...
2005,NO0B1,3,NO,b'Jan Mayen',b'Jan Mayen',3.0,3,1,"POLYGON ((3623747.621 5400386.841, 3624031.138..."
2006,EE009,3,EE,b'Kesk-Eesti',b'Kesk-Eesti',4.0,3,1,"MULTIPOLYGON (((5216227.688 4159212.769, 52172..."
2007,NO0,1,NO,b'Norge',b'Norge',0.0,0,0,"MULTIPOLYGON (((4961367.759 5413266.131, 49622..."
2008,NO0B,2,NO,b'Jan Mayen and Svalbard',b'Jan Mayen and Svalbard',,0,0,"MULTIPOLYGON (((4744650.828 6379141.635, 47446..."


In [10]:
# Only get NUTS0 level
NUTS_data = NUTS_data[NUTS_data['LEVL_CODE'] == 0]
NUTS_data.shape

(37, 9)

In [18]:
# List all csv files (first focus on country files)
CBA_files = CBA_dir.glob('*country*.csv')

# Initialize empty CBA dataframe
CBA = []

for CBA_file in CBA_files:
    print(CBA_file)

    # Open csv file
    cur_CBA = pd.read_csv(CBA_file)

    # Retrieve the geometry from NUTS data
    cur_CBA = cur_CBA.merge(NUTS_data[['NUTS_ID', 'geometry']], 
                              left_on='nuts', 
                              right_on='NUTS_ID', 
                              how='left')

    # Convert to a geodataframe
    cur_CBA = gpd.GeoDataFrame(cur_CBA, geometry='geometry')

    # Extract the scenario (e.g., ssp126, ssp245, or ssp585) from the file name
    scenario = "SSP126" if "SSP126" in str(CBA_file) else (
        "SSP245" if "SSP245" in str(CBA_file) else "SSP585"
    )

    # Add the scenario as a new column
    cur_CBA['scenarios'] = scenario

    # Append to the list
    CBA.append(cur_CBA)

CBA = pd.concat(CBA, ignore_index=True)
CBA.drop("nuts", axis=1, inplace=True)

# Write as parquet
CBA.to_parquet(str(CBA_file.parent.joinpath('GCF.open.CBA_country.all.parquet')))

P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA\GCF.open.CBA_country.SSP126.csv
P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA\GCF.open.CBA_country.SSP245.csv
P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA\GCF.open.CBA_country.SSP585.csv


In [19]:
# List all csv files (first focus on country files)
str(CBA_file.parent.joinpath('GCF.open.CBA_country.all.parquet'))

'P:\\11207608-coclico\\FULLTRACK_DATA\\WP6\\data\\CBA\\GCF.open.CBA_country.all.parquet'

In [20]:
test_CBA = gpd.read_parquet(str(CBA_file.parent.joinpath('GCF.open.CBA_country.all.parquet')))
test_CBA

Unnamed: 0,country,coast_length,protection_2050,retreat_2050,acc_2050,protect_retreat_2050,no_adaptation_2050,protection_2100,retreat_2100,acc_2100,...,retreat_2150,acc_2150,protect_retreat_2150,no_adaptation_2150,costs_adaptation,costs_damage,costs,NUTS_ID,geometry,scenarios
0,Aaland,3886.0,0.0,3.8,1.5,0.0,94.7,0.0,3.8,1.5,...,3.8,1.5,0.0,94.7,0.2,0.0,0.2,,,SSP126
1,Albania,753.0,10.7,3.8,40.0,0.0,45.5,27.6,3.8,23.1,...,9.3,1.0,0.0,45.1,1.3,25.5,26.8,AL,"MULTIPOLYGON (((5121233.536 2221719.441, 51208...",SSP126
2,Belgium,319.0,97.3,2.0,0.0,0.0,0.7,97.3,2.0,0.0,...,2.0,0.0,0.0,0.7,16.3,10.7,27.0,BE,"MULTIPOLYGON (((3962902.889 3162436.894, 39626...",SSP126
3,Bulgaria,466.0,0.4,0.0,23.1,0.0,76.5,0.4,0.0,24.4,...,0.0,18.5,0.0,75.1,0.1,0.9,1.0,BG,"POLYGON ((5330611.947 2430822.479, 5332044.063...",SSP126
4,Croatia,5309.0,3.5,7.7,1.2,0.0,87.6,4.6,7.7,0.0,...,7.7,0.0,0.1,87.7,1.2,0.7,1.8,HR,"MULTIPOLYGON (((4809428.353 2624702.723, 48094...",SSP126
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
100,Sweden,24115.0,2.9,20.4,1.1,0.4,75.2,2.9,19.6,1.2,...,19.5,1.2,1.3,75.1,14.8,17.5,32.4,SE,"MULTIPOLYGON (((4972382.336 4789635.007, 49721...",SSP585
101,Turkey,827.0,0.3,6.7,4.4,0.0,88.5,0.3,6.7,4.4,...,6.7,0.2,0.3,88.5,0.1,2.0,2.0,TR,"MULTIPOLYGON (((6349101.036 2450900.951, 63518...",SSP585
102,Ukraine,1732.0,0.0,22.7,0.9,0.0,76.4,0.0,22.7,0.6,...,22.7,0.0,0.1,77.2,0.3,0.1,0.4,RO,"MULTIPOLYGON (((5550222.999 2933295.763, 55512...",SSP585
103,United Kingdom,19867.0,10.4,24.8,0.6,2.5,61.7,10.4,24.8,0.8,...,24.8,0.7,2.5,61.5,145.6,116.9,262.5,UK,"MULTIPOLYGON (((3546135.140 4022028.934, 35466...",SSP585


### CUT the file above up into pieces for the F/E map

In [44]:
adap_strategy = ["protection", "retreat", "acc", "protect_retreat", "no_adaptation"]
scenario = ["SSP126", "SSP245", "SSP585"]
time = ["2050", "2100", "2150"]
keepcols = ["country", "coast_length", "costs_adaptation", "costs_damage", "costs", "NUTS_ID", "geometry", "scenarios"]

# create folder structure & split files
for adap in adap_strategy:
    for scen in scenario:
        for t in time:

            # combine the variables to span to column name
            comb_col = adap+"_"+t
            keepcols.append(comb_col) # add the variable column 
            #print(keepcols)
            #print(adap, scen, t, comb_col)

            # filter the original dataframe
            fil_DF = CBA[keepcols] # filter base columns
            fil_DF = fil_DF[fil_DF['scenarios'] == scen] # filter scenario
            fil_DF.drop("scenarios", axis=1, inplace=True) # drop scenario column
            #print(fil_DF)

            # remove the added variable column for the next iteration
            keepcols.remove(comb_col) # remove the variable column
            #print(keepcols)
            
            # make the folder structure and write as parquet & gpkg
            CBA_dir.joinpath("maps", adap, scen).mkdir(parents=True, exist_ok=True)
            fil_DF.to_parquet(str(CBA_dir.joinpath("maps", adap, scen, "GCF_open_CBA_country_%s_%s_%s.parquet"%(adap, scen, t))))
            fil_DF.to_file(str(CBA_dir.joinpath("maps", adap, scen, "GCF_open_CBA_country_%s_%s_%s.gpkg"%(adap, scen, t))), layer='CBA_%s_%s_%s'%(adap, scen, t), driver='GPKG')