## Notebook to process CBA from csv-file to parquet

In [2]:
# Load software
import os
import pathlib
import sys
import shapely
import pystac_client
import pandas as pd
from shapely import Polygon, geometry
from affine import Affine
from rasterio.features import shapes
import json
import itertools
import numpy as np
import geopandas as gpd
import pandas as pd
import matplotlib.pyplot as plt
import xarray as xr
from dotenv import load_dotenv
import math
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, Union
import rioxarray as rio

# Import custom functionality
from coclicodata.drive_config import p_drive

# Define (local and) remote drives
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")


import os
os.environ['USE_PYGEOS'] = '0'
import geopandas

In the next release, GeoPandas will switch to using Shapely by default, even if PyGEOS is installed. If you only have PyGEOS installed to get speed-ups, this switch should be smooth. However, if you are using PyGEOS directly (calling PyGEOS functions on geometries from GeoPandas), this will then stop working and you are encouraged to migrate from PyGEOS to Shapely 2.0 (https://shapely.readthedocs.io/en/latest/migration_pygeos.html).
  import geopandas as gpd


In [3]:
# Set path to csv data
CBA_dir = coclico_data_dir.joinpath('WP6', 'data', 'CBA_D6.4')

# List all csv files (first focus on country files)
CBA_csv_files = list(CBA_dir.glob('*CBA.SSP*.csv'))

# Read all files and write as parquet
for CBA_csv_file in CBA_csv_files:
    # Read csv
    df = pd.read_csv(CBA_csv_file)
    
    # Write as parquet
    parquet_file = CBA_dir.joinpath(CBA_csv_file.stem + '.parquet')
    df.to_parquet(parquet_file, index=False)
    
    # Print message
    print(f"Converted {CBA_csv_file} to {parquet_file}")

Converted P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP126.csv to P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP126.parquet
Converted P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP245.csv to P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP245.parquet
Converted P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP585.csv to P:\11207608-coclico\FULLTRACK_DATA\WP6\data\CBA_D6.4\GCF.open.CBA.SSP585.parquet


In [4]:
# List all parquet files (first focus on country files)
CBA_files = list(CBA_dir.glob('*CBA.SSP*.parquet'))

In [5]:
# Load the first file in pandas
CBA_file = CBA_files[0]
CBA_df = pd.read_parquet(CBA_file)
CBA_df


Unnamed: 0,fpid,year,protection_height,protection_level,retreat_height,accommodation,total_costs,total_adaptation_costs,total_flood_damages,current_adaptation_cost,annual_flood_damage
0,15654,2020,0.0,1.0,0.0,0,672686,122598,550087,122598,55009
1,15654,2030,0.0,1.0,0.7,0,0,0,0,0,0
2,15654,2040,0.0,1.0,0.7,0,0,0,0,0,0
3,15654,2050,0.0,1.0,0.7,0,0,0,0,0,0
4,15654,2060,0.0,1.0,0.7,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...
295577,34865,2110,0.0,1.0,5.0,0,3116,0,3116,0,20
295578,34865,2120,0.0,1.0,5.0,0,3920,0,3920,0,70
295579,34865,2130,0.0,1.0,5.0,0,4326,0,4326,0,86
295580,34865,2140,0.0,1.0,5.0,0,4662,0,4662,0,466


In [6]:
# Load the first file in pandas
CBA_file = CBA_files[1]
CBA_df = pd.read_parquet(CBA_file)
CBA_df


Unnamed: 0,fpid,year,protection_height,protection_level,retreat_height,accommodation,total_costs,total_adaptation_costs,total_flood_damages,current_adaptation_cost,annual_flood_damage
0,17597,2020,1.2616,33.0,0.0,0,2143342,2134212,9129,95856,908
1,17597,2030,0.0000,1.0,1.4,0,63,0,63,0,1
2,17597,2040,0.0000,1.0,1.4,0,76,0,76,0,1
3,17597,2050,0.0000,1.0,1.4,0,94,0,94,0,1
4,17597,2060,0.0000,1.0,1.4,0,112,0,112,0,1
...,...,...,...,...,...,...,...,...,...,...,...
295577,34865,2110,0.0000,1.0,5.0,0,31927,0,31927,0,78
295578,34865,2120,0.0000,1.0,5.0,0,41860,0,41860,0,102
295579,34865,2130,0.0000,1.0,5.0,0,54892,0,54892,0,552
295580,34865,2140,0.0000,1.0,5.0,0,66349,0,66349,0,6635


In [7]:
# Load floodplain geometry
flood_plain_fp = coclico_data_dir.joinpath('WP6', 'data','CoCliCo_copernicusDEM_EU_v11_coastplain_elecz_H100+2m_GADM1_partioned.parquet')
flood_plain = gpd.read_parquet(flood_plain_fp)

flood_plain

Unnamed: 0,fpid,id_gadm1,id_old,coastal,coast_length,number_segments,longitude,latitude,countryid,geometry
0,0,2243,264,1,2.175413,2.0,25.672255,71.170236,NOR,"POLYGON ((25.67602 71.18543, 25.67739 71.18543..."
1,1,2243,122,1,2.269129,1.0,25.558042,71.160413,NOR,"POLYGON ((25.55404 71.16784, 25.55473 71.16784..."
2,2,2243,3,1,0.911049,1.0,25.351318,71.139855,NOR,"POLYGON ((25.34595 71.14614, 25.34664 71.14614..."
3,3,2243,6,1,0.835197,3.0,25.313856,71.144652,NOR,"POLYGON ((25.30735 71.14545, 25.30803 71.14545..."
4,4,2243,2,1,0.428803,1.0,25.372105,71.135287,NOR,"POLYGON ((25.36514 71.13723, 25.36697 71.13723..."
...,...,...,...,...,...,...,...,...,...,...
41322,41322,880,31259,1,0.477311,1.0,-15.570250,27.746277,ESP,"POLYGON ((-15.56991 27.74936, -15.56968 27.749..."
41323,41323,880,31255,1,0.798565,2.0,-15.572534,27.741023,ESP,"POLYGON ((-15.57265 27.74502, -15.57242 27.745..."
41324,41324,880,31256,1,0.701258,1.0,-15.587153,27.736227,ESP,"POLYGON ((-15.58270 27.74274, -15.58201 27.742..."
41325,41325,880,31253,1,0.374697,1.0,-15.591265,27.737140,ESP,"POLYGON ((-15.59206 27.73794, -15.59206 27.737..."


In [8]:
# Add flood_plain 'longitude', 'latidude', 'countryid', 'geometry' to CBA_df
for CBA_file in CBA_files:
    # Read CBA file
    CBA = pd.read_parquet(CBA_file)

    # Read floodplain file
    flood_plain = gpd.read_parquet(flood_plain_fp)

    # Merge 'longitude', 'latidude', 'countryid', 'geometry' to CBA on fpid
    CBA_fp = CBA.merge(flood_plain[['fpid', 'longitude', 'latitude', 'countryid', 'geometry']], on='fpid', how='left')

    # Set geometry and convert to GeoDataFrame
    CBA_fp = gpd.GeoDataFrame(CBA_fp, geometry='geometry', crs='EPSG:4326')

    # Save to parquet
    CBA_fp.to_parquet(CBA_file.with_suffix('.floodplain.parquet'), index=False)

In [34]:
# Show all instances of a single fpid
fpid = 32298
CBA_fp.loc[CBA_fp['fpid'] == fpid]

Unnamed: 0,fpid,year,protection_height,protection_level,retreat_height,accommodation,total_costs,total_adaptation_costs,total_flood_damages,current_adaptation_cost,annual_flood_damage,longitude,latitude,countryid,geometry
229209,32298,2020,0.0,1,0.0,0,623736767188,30475955625,593260811563,22546661704,58253952033,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229210,32298,2030,7.9,1579,0.0,0,22734959236,13069194192,9665765044,0,85243070,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229211,32298,2040,7.9,1481,0.0,0,23926254701,10743405388,13182849313,0,102801501,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229212,32298,2050,7.9,1373,0.0,0,25164400927,14816675187,10347725739,0,130640296,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229213,32298,2060,7.9,1266,0.0,0,26334340915,11021322020,15313018896,0,173212689,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229214,32298,2070,7.9,1172,0.0,0,27241536133,16555031571,10686504562,6574498267,234158710,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229215,32298,2080,9.9,5034,0.0,0,19590196751,10172407826,9417788926,0,82022136,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229216,32298,2090,9.9,4594,0.0,0,19472070533,10480386894,8991683639,0,112676547,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229217,32298,2100,9.9,4167,0.0,0,18954399765,9587291361,9367108404,0,155221331,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
229218,32298,2110,9.9,3886,0.0,0,17804743450,9526313517,8278429933,0,205760459,8.452015,53.218581,DEU,"POLYGON ((7.91373 53.71780, 7.91442 53.71780, ..."
