## Proof of Concept for flood maps

This notebook is an attempt at translating the CoCliCo User Story into code. 

The data used is Coastal Flood Hazard Projections and can be found here: p:\11207608-coclico\FULLTRACK_DATA\WP4\

In [None]:
# Import modules

import warnings

# import holoviews as hv
import cartopy.crs as crs
import cartopy.feature as cf
import matplotlib.pyplot as plt
import matplotlib.ticker as tck
import numpy as np
import shapely
import pandas as pd
import pystac_client
import xarray as xr
import rioxarray as rio
import pathlib
from pathlib import Path
import pystac
import pystac_client
import geopandas as gpd
from shapely.geometry import shape
from pystac.extensions.projection import ProjectionExtension


#import colormaps as cmaps
import pyam # https://pyam-iamc.readthedocs.io/en/latest/index.html
import matplotlib.colors as mcolors

from copy import deepcopy
from typing import List, Dict

# Import custom functionality
from coclicodata.drive_config import p_drive

# ignore warnings
warnings.filterwarnings("ignore")

## Open STAC data

1st connect to the catalog, 
2nd retrieve the collection of interest, 
3rd retrieve one item to see it's contents

In [None]:
# Setup the URL to STAC catalog in Google Cloud
catalog = pystac_client.Client.open(
    P
)

# Retrieve collection from catalog, in this case Coastal Flood Hazard Projections (cfhp)
collection = catalog.get_child(id = 'cfhp_all')

# Show collection
collection

In [None]:
# Retrieve a single item for testing
test_item = collection.get_item(r'UNDEFENDED_MAPS\1000\None\2010.tif')

# Show test item
test_item

## Open LAU's from STAC

In [None]:
# Retrieve collection from catalog, in this case Coastal Flood Hazard Projections (cfhp)
LAU = catalog.get_child('LAU_CM')

# Get the href to the lau data
cloud_lau_path = LAU.assets['geoparquet-stac-items'].href

# Retrieve actual data using regular pandas, loading with geopandas is very slow
lau_data = pd.read_parquet(cloud_lau_path)

# Because we load with regular pandas the polygon data needs to be converted from WKB - Well-Known Binary to shapely.Polygon
lau_data['geometry'] = lau_data['geometry'].apply(shapely.wkb.loads)

# Now convert to geopandas
lau_data = gpd.GeoDataFrame(lau_data, geometry='geometry', crs='EPSG:3035')

lau_data

In [None]:
# Filter on the Netherlands as an example
lau_NL = lau_data.loc[lau_data["CNTR_CODE"] == "NL"] 

# Select one for testing
lau = lau_data.loc[lau_data["LAU_NAME"] == "Varel, Stadt"] 

lau.plot()

In [None]:
# Retrieve from a single chunk geometry 
test_chunk = test_item.assets.get(r'UNDEFENDED_MAPS\1000\None\2010\B01_epsg=3035_x=4005362_y=2958337.tif')

# Access the Projection extension on the asset
projection = ProjectionExtension.ext(test_chunk)

[chunk_bbox] = projection.geometry['coordinates']

chunk_geom = shapely.Polygon(chunk_bbox)

chunk_geom

In [None]:
# Add the chunk geometry to a geodataframe
chunk_geom_df = gpd.GeoDataFrame(geometry = [chunk_geom])

# Generate figure
f, ax = plt.subplots(figsize=(6, 4))
ax.axis('equal')

# Plot extent of the chunk
chunk_geom_df.plot(ax=ax, color='lightgray', edgecolor='black')
# Plot the LAU
lau.plot(ax=ax, color='gray')

# Iterate over LAU's, for testing this is just one LAU
for i, cur_lau in lau.iterrows():
    
    # Check if the LAU intersects with the chunk geometry
    if cur_lau['geometry'].intersects(chunk_geom_df['geometry'])[0]:
        
        print('Working on: ' + str(cur_lau['LAU_NAME']))
        
        # Load raw band_data dataset 
        ds = rio.open_rasterio(test_chunk.href, masked = True)

        # First clip to bounding box
        ds_clip = ds.rio.clip_box(*cur_lau.geometry.bounds)

        # Then, clip dataset to match AOI polygon
        ds_clip = ds_clip.rio.clip(cur_lau)

# Plot data
ds_clip.plot(ax=ax, cbar_kwargs={'label': 'Flood depth [m]'})
ax.set_title('Clipped LAU with flood depth')


In [None]:
# Compute statistics

# Amount of noData's within dataset clipped to polygon
class flood_stats:
    def __init__(self):
        
        # Intialize empty lists
        self.all_nans = []
        self.all_less05 = []
        self.all_more05 = []
        self.all_total = []
        self.all_flooded = []

    def compute(self,ds):

        self.ds = ds

        # Compute number of pixels with Nan, <0.5m flood depth, and >0.5m flood depth
        self.n_nans = np.isnan(self.ds.values).sum()
        self.n_less05 = np.array([self.ds.values<0.5]).sum()
        self.n_more05 = np.array([self.ds.values>0.5]).sum()

        # Sum all to find total number of pixels
        self.total = self.n_nans + self.n_less05 + self.n_more05

        # Determine fraction of pixels that are flooded
        self.flooded = np.divide(np.add(self.n_less05, self.n_more05),self.total)

    def add_new_values(self):

        # Append values to lists
        self.all_nans.append(self.n_nans)
        self.all_less05.append(self.n_less05)
        self.all_more05.append(self.n_more05)
        self.all_total.append(self.total)
        self.all_flooded.append(self.flooded)
        
    def convert2arrays(self):

        # Convert all lists to numpy arrays
        self.all_nans = np.array(self.all_nans)
        self.all_less05 = np.array(self.all_less05)
        self.all_more05 = np.array(self.all_more05)
        self.all_total = np.array(self.all_total)
        self.all_flooded = np.array(self.all_flooded)

if __name__ == '__main__':
    
    clip_stats = flood_stats()
    clip_stats.compute(ds_clip)

    print('summed pixels = ' + str(clip_stats.total))
    print('original pixels = ' + str(np.size(ds_clip.values)))

In [None]:
# Plot data
import matplotlib.pyplot as plt

# Set plot style 
plt.style.use('_mpl-gallery')

# Set labels
labels = 'no flooding','flooding < 0.5m', 'flooding > 0.5m'
# Initiate figure
fig, ax = plt.subplots()
# Do plotting
ax.pie(np.array([clip_stats.n_nans,clip_stats.n_less05,clip_stats.n_more05]), labels = labels)
ax.set_title('Percentage flooded = ' + str(round(clip_stats.flooded*100)) + '%')

In [None]:
import os

# Set up folder structure
folder_structure = {
    "Mean_spring_tide": [],
    "RP": ["1000", "100", "1"],
    "SLR": {
        "High_end": ["2100", "2150"],
        "SSP126": ["2100"],
        "SSP245": ["2050", "2100"],
        "SSP585": ["2030", "2050", "2100"]
    }
}

def get_paths(folder_structure, base_dir=''):
    """Generate paths for a folder structure defined by a dict"""
    paths = []
    for key, value in folder_structure.items():
        if isinstance(value, dict):
            paths.extend(get_paths(value, os.path.join(base_dir, key)))
        elif isinstance(value, list):
            if value:
                for item in value:
                    if item != "":
                        paths.append(os.path.join(base_dir, key, item))
            else:
                paths.append(os.path.join(base_dir, key))
        else:
            continue
    return paths

map_types = ["HIGH_DEFENDED_MAPS", "LOW_DEFENDED_MAPS", "UNDEFENDED_MAPS"]

path_list = get_paths(folder_structure)
path_list


In [None]:
# Load all scenarios
from pathlib import Path
import time

start_time = time.time()

# Initialize flood stats class
clip_stats = flood_stats()
# Initialize empty list for collecting file paths
all_fps = []

for item in collection.get_all_items():
    # NOTE: for now only focus on low_defended map_type
    if map_types[1] in item.id and 'SLR' in str(item.id):

        for chunk_title, chunk in item.get_assets().items():

            if chunk_title != 'visual':
                
                # Get item bounding box geometry
                [chunk_boundbox] = chunk.extra_fields.get('proj:geometry')['coordinates']
                # Transform bounding box coordinates to GeoDataFrame Polygon
                chunk_boundbox = gpd.GeoDataFrame(geometry = [shapely.Polygon(chunk_boundbox)])
                
                # Check if lau intersects with item bounding box
                if cur_lau.geometry.intersects(chunk_boundbox.geometry)[0]:
            
                    print('Working on: ' + str(item.id))

                    # Load raw band_data dataset 
                    ds = rio.open_rasterio(chunk.href, masked = True)

                    # Clip dataset to match AOI
                    ds_clip = ds.rio.clip(cur_lau)

                    # Close dataset to save memory
                    del ds

                    # Compute flood statistics for each floop_maps
                    clip_stats.compute(ds_clip)
                    clip_stats.add_new_values()

                    # Collect all filepaths
                    all_fps.append(item.id)

clip_stats.convert2arrays()

In [None]:
# Convert list of paths to pathlib
fps = [Path(fp) for fp in all_fps]

fig, ax = plt.subplots(figsize=(5,3))

col_alpha = [0.1,0.35,0.65,0.9]
col_scen = 'SSP126', 'SSP245', 'SSP585', 'High_end'
scen = []
year = np.array([])

for i, fp in enumerate(fps):
    
    cur_scen = fp.parts[2]
    scen.append(cur_scen)
    
    year = np.append(year, int(fp.stem))
    
    ax.plot(year[i],clip_stats.all_flooded[i],'o',color = 'C1', alpha = col_alpha[col_scen.index(cur_scen)], markeredgecolor = 'black')
    ax.set_xlabel('Time [years]')
    ax.set_ylabel('percentage flooded [%]')
ax.legend(scen)

In [None]:
# Do this for all polygons within the current tif file

    
# Add new columns to LAU dataframe
lau_data['n_nans'] = None
lau_data['n_less05'] = None
lau_data['n_more05'] = None
lau_data['total'] = None
lau_data['flooded'] = None

track = np.empty([])

# Iterate over all items
for i, item in enumerate(collection.get_all_items()):
    
    # Match with map type we're working on
    if not r"UNDEFENDED_MAPS\RP\1000" in item.id:
        continue

    for chunk_title, chunk in item.get_assets().items():

        if 'visual' in chunk_title:
            continue

        if not "B01_epsg=3035_x=4005362_y=2958337.tif" in chunk_title:
            continue

        print('Working on ' + str(item.id) + ' | chunk: ' + pathlib.Path(chunk_title).stem)
        # Retreive item geometry
        [chunk_geom] = chunk.extra_fields.get('proj:geometry')['coordinates']
        # Convert to polygon
        chunk_geom_df = gpd.GeoDataFrame(geometry = [shapely.Polygon(chunk_geom)])

        # Load raw band_data dataset 
        ds = rio.open_rasterio(chunk.href, masked = True)

        # Iterate over LAU's 
        for i, cur_lau in lau_data.iterrows():

            # Check if LAU intersects with flood map extent
            # TODO: change from within to intersects and find a way to deal with bordering tif's 
            if not cur_lau['geometry'].within(chunk_geom_df['geometry'])[0]:
                continue

            # First clip to bounding box
            ds_clip = ds.rio.clip_box(*cur_lau.geometry.bounds)

            # Check if flooded pixels exist within the bounding box
            if ds_clip.isnull().all():
                continue

            track = np.append(track, i)
            
            # Update user
            print('Flooded pixels detected in: ' + str(cur_lau['LAU_NAME']))
            # Then, clip dataset to match AOI polygon
            ds_clip = ds_clip.rio.clip(cur_lau)

            # Compute flood statistics for each floop_maps
            clip_stats.compute(ds_clip)
            
            # Add stats to dataframe
            lau_data['n_nans'][i] = clip_stats.n_nans
            lau_data['n_less05'][i] = clip_stats.n_less05
            lau_data['n_more05'][i] = clip_stats.n_more05
            lau_data['total'][i] = clip_stats.total
            lau_data['flooded'][i] = clip_stats.flooded

In [None]:
# Reduce data frame to only those lau's that have been altered
altered_lau_data = lau_data.loc[~lau_data.flooded.isna()]
altered_lau_data

In [None]:
f, ax = plt.subplots(figsize=(10, 6))

chunk_geom_df.plot(ax=ax, color='lightgray', edgecolor='black')
lau_data.plot(ax=ax, color='gray')
cmap = 'coolwarm'
altered_lau_data.plot(ax=ax, column='flooded', cmap=cmap, legend=True)

# Set the zoom level to the extent of the first plot
ax.set_xlim(chunk_geom_df.total_bounds[0]-0.1e6, chunk_geom_df.total_bounds[2]+0.1e6)
ax.set_ylim(chunk_geom_df.total_bounds[1]-0.1e6, chunk_geom_df.total_bounds[3]+0.1e6)

ax.legend(['test1','test2','test4'])

# Show the plot
plt.show()


In [None]:
def find_n_assets(collection : pystac.Collection, exclude_asset_id : str ) -> int:

    total_assets = 0

    # Loop over each item in the collection
    for item in collection.get_all_items():
        # Loop over each asset in the item
        for asset_key, asset in item.assets.items():
            # Exclude assets id
            if asset_key != exclude_asset_id:
                total_assets += 1
    return total_assets

In [None]:
# Make dataframe with all items and their bounding boxes

def make_gdf_from_chunks(collection : pystac.Collection) -> gpd.GeoDataFrame:
    
    n_assets = find_n_assets(collection, exclude_asset_id='visual')

    df = gpd.GeoDataFrame(columns = ['id','href','geometry'],index=range(n_assets))

    i = 0 

    for item in collection.get_all_items():

        for chunk_title, chunk in item.get_assets().items():

            if 'visual' in chunk_title:
                continue

            # Retrieve bounding box
            # Retreive item geometry
            [chunk_geom] = chunk.extra_fields.get('proj:geometry')['coordinates']

            df['href'][i] = chunk.href                         # add href (URL to Google Bucket chunked COG)
            df['id'][i] = pathlib.Path(chunk_title).as_posix() # add item id
            df['geometry'][i] = shapely.Polygon(chunk_geom)    # add geometry as polygon

            i = i + 1

    return df

if __name__ == '__main__':
    tdf = make_gdf_from_chunks(collection)

In [None]:
tdf

In [None]:
import time
# New method is working and verified againts the old method, change within 

# Trial reduced LAU dataframe
lau_data = gpd.read_parquet(r'p:\11207608-coclico\FULLTRACK_DATA\WP4\LAU_stats\LAU_2020_NUTS_2021_01M_3035_CM.parquet')

# # For testing only do one coutnry
# lau_data = lau_data.loc[lau_data.CNTR_CODE=='PT']
# lau_data.reset_index(drop=True, inplace=True)

# Initialize flood stats class
clip_stats = flood_stats()

tifs_df = make_gdf_from_chunks(collection)

# for i, item in enumerate(collection.get_all_items()):
for map_type in map_types:
    for scen in path_list:

        scen = pathlib.Path(scen).as_posix()
        # Create a regex pattern that checks for both map_type and scen in any order
        pattern = rf"(?=.*{map_type})(?=.*{scen})"

        # Filter the DataFrame to only include rows where both map_type and scen are in the 'id' column
        tif_df = tifs_df[tifs_df['id'].str.contains(pattern, regex=True)]

        # Find all LAU's within a chunked tif based on the spatial join
        lau_tif_join = lau_data.sjoin(tif_df, how = 'left', op = 'intersects')

        # Sort based on the chunks
        lau_tif_join = lau_tif_join.sort_values('href')

        # Get unique chunks from joined dataframe do not consider NaN's
        chunks = lau_tif_join.href.dropna().unique()
        ids = lau_tif_join.id.dropna().unique()

        # Iterate over chunks
        for chunk, id in zip(chunks, ids):
            
            print('Now working on: ' + str(id))
            # Load raw band_data dataset 
            ds = rio.open_rasterio(chunk, masked = True)

            # Check if flooded pixels exist within the chunk
            # if ds.isnull().all():
            #     continue

            # Select only LAU's that match with chunk
            cur_chunk_lau = lau_tif_join[lau_tif_join.href.str.contains(chunk,na=False)]

            # Initiate new columns in dataframe
            col_name = Path(id).parent
            if not str(col_name.joinpath('total')) in lau_data:
            
                lau_data[str(col_name.joinpath('n_nans'))] = None
                lau_data[str(col_name.joinpath('n_less05'))] = None
                lau_data[str(col_name.joinpath('n_more05'))] = None
                lau_data[str(col_name.joinpath('total'))] = None
            
            # Iterate over LAU's 
            for i, cur_lau in cur_chunk_lau.iterrows():

                t0 = time.time()

                # First clip to bounding box
                try:
                    ds_clip = ds.rio.clip_box(*cur_lau.geometry.bounds)
                except:
                    print("Skipping LAU: Clipping resulted in a one-dimensional raster")
                    print(i)
                    print(cur_lau.LAU_NAME)
                    continue

                # Check if flooded pixels exist within the bounding box
                if ds_clip.isnull().all():
                    continue

                # Then, clip dataset to match AOI polygon
                try:
                    ds_clip = ds_clip.rio.clip([cur_lau.geometry])
                except:
                    print("Skipping LAU, because clipping to polygon does not work")
                    continue

                # Check if flooded pixels exist within polygon
                if ds_clip.isnull().all():
                    continue

                # Compute flood statistics for each floop_maps
                clip_stats.compute(ds_clip)

                # Update user
                print("{:.2%}".format(clip_stats.flooded) + " flooded : " + cur_lau.LAU_NAME)
                
                # Add stats to dataframe
                if lau_data[str(col_name.joinpath('total'))][i] == None:
                    lau_data[str(col_name.joinpath('n_nans'))][i] = clip_stats.n_nans
                    lau_data[str(col_name.joinpath('n_less05'))][i] = clip_stats.n_less05
                    lau_data[str(col_name.joinpath('n_more05'))][i]  = clip_stats.n_more05
                    lau_data[str(col_name.joinpath('total'))][i]  = clip_stats.total
                
                else:
                    print('Edge case detected, new chunk data is summed')

                    lau_data[str(col_name.joinpath('n_nans'))][i] = lau_data[str(col_name.joinpath('n_nans'))][i] + clip_stats.n_nans
                    lau_data[str(col_name.joinpath('n_less05'))][i] = lau_data[str(col_name.joinpath('n_less05'))][i] + clip_stats.n_less05
                    lau_data[str(col_name.joinpath('n_more05'))][i]  = lau_data[str(col_name.joinpath('n_more05'))][i] + clip_stats.n_more05
                    lau_data[str(col_name.joinpath('total'))][i]  = lau_data[str(col_name.joinpath('total'))][i] + clip_stats.total

                print(time.time() - t0)

        # Compute percentage flooded pixels per LAU, def: perc_flooded = 1-n_nans/n_total
        lau_data[str(col_name.joinpath('flooded'))] = 1-lau_data[str(col_name.joinpath('n_nans'))]/lau_data[str(col_name.joinpath('total'))]

LAU_NUTS_match = pd.read_csv(r'p:\11207608-coclico\FASTTRACK_DATA\XX_NUTS\lau_2020_nuts_2021_concordance_by_geo.csv')
# Drop unneeded columns
LAU_NUTS_match = LAU_NUTS_match.drop(columns=['gisco_id','country','lau_id','lau_name','population','area_km2','year'])
LAU_NUTS_match = LAU_NUTS_match.rename(columns={'fid': 'FID'})

# Merge the two dataframes
LAU_NUTS_data = lau_data.merge(LAU_NUTS_match,on='FID')

# Reorder columns
# Retrieve column names
cols = LAU_NUTS_data.columns

# Move columns
cols = cols.insert([2],cols[-2:])

# Drop the old ones
cols = cols[:-2]

# Store in final form
LAU_NUTS_data = LAU_NUTS_data[cols]

# Write geodataframe
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")
ds_dir = coclico_data_dir.joinpath('WP4','LAU_stats')
out_file= ds_dir.joinpath('LAU_NUTS_CFHP_latest.parquet')

# LAU_NUTS_data.to_parquet(out_file)

In [None]:
lau_data

In [None]:
# New method is working and verified againts the old method, change within 

# Trial reduced LAU dataframe
lau_data = gpd.read_parquet(r'p:\11207608-coclico\FULLTRACK_DATA\WP4\LAU_stats\LAU_2020_NUTS_2021_01M_3035_CM.parquet')

# # For testing only do one coutnry
# lau_data = lau_data.loc[lau_data.CNTR_CODE=='PT']
# lau_data.reset_index(drop=True, inplace=True)

# Initialize flood stats class
clip_stats = flood_stats()

tifs_df = make_gdf_from_chunks(collection)

map_types = ["HIGH_DEFENDED_MAPS", "LOW_DEFENDED_MAPS", "UNDEFENDED_MAPS"]
rps = ["static", "1", "100", "1000"]  # 4 options
scenarios = ["None", "SSP126", "SSP245", "SSP585", "High_End"]  # 5 options
times = ["2010", "2030", "2050", "2100", "2150"]  # 5 options

# for i, item in enumerate(collection.get_all_items()):

for map_type in map_types:
    for rp in rps:
        for scen in scenarios:
            for t in times:

                # Create a regex pattern that checks for both map_type and scen in any order
                pattern = rf"(?=.*\b{map_type}\b)(?=.*\b{rp}\b)(?=.*\b{scen}\b)(?=.*\b{t}\b)"

                # Filter the DataFrame to only include rows where both map_type and scen are in the 'id' column
                tif_df = tifs_df[tifs_df['id'].str.contains(pattern, regex=True)]

                # Find all LAU's within a chunked tif based on the spatial join
                lau_tif_join = lau_data.sjoin(tif_df, how = 'left', op = 'intersects')

                # Sort based on the chunks
                lau_tif_join = lau_tif_join.sort_values('href')

                # Get unique chunks from joined dataframe do not consider NaN's
                chunks = lau_tif_join.href.dropna().unique()
                ids = lau_tif_join.id.dropna().unique()

                # Iterate over chunks
                for chunk, id in zip(chunks, ids):
                    
                    print('Now working on: ' + str(id))

                    # Load raw band_data dataset 
                    ds = rio.open_rasterio(chunk, masked = True)

                    # Check if flooded pixels exist within the chunk
                    # if ds.isnull().all():
                    #     continue

                    # Select only LAU's that match with chunk
                    cur_chunk_lau = lau_tif_join[lau_tif_join.href.str.contains(chunk,na=False)]

                    # Initiate new columns in dataframe
                    col_name = Path(id).parent
                    if not str(col_name.joinpath('total')) in lau_data:
                    
                        lau_data[str(col_name.joinpath('n_nans'))] = None
                        lau_data[str(col_name.joinpath('n_less05'))] = None
                        lau_data[str(col_name.joinpath('n_more05'))] = None
                        lau_data[str(col_name.joinpath('total'))] = None
                    
                    # Iterate over LAU's 
                    for i, cur_lau in cur_chunk_lau.iterrows():

                        # First clip to bounding box
                        try:
                            ds_clip = ds.rio.clip_box(*cur_lau.geometry.bounds)
                        except:
                            print("Skipping LAU: Clipping resulted in a one-dimensional raster")
                            print(i)
                            print(cur_lau.LAU_NAME)
                            continue

                        # Check if flooded pixels exist within the bounding box
                        if ds_clip.isnull().all():
                            continue

                        # Then, clip dataset to match AOI polygon
                        try:
                            ds_clip = ds_clip.rio.clip([cur_lau.geometry])
                        except:
                            print("Skipping LAU, because clipping to polygon does not work")
                            continue

                        # Check if flooded pixels exist within polygon
                        if ds_clip.isnull().all():
                            continue

                        # Compute flood statistics for each floop_maps
                        clip_stats.compute(ds_clip)

                        # Update user
                        print("{:.2%}".format(clip_stats.flooded) + " flooded : " + cur_lau.LAU_NAME)
                        
                        # Add stats to dataframe
                        if lau_data[str(col_name.joinpath('total'))][i] == None:
                            lau_data[str(col_name.joinpath('n_nans'))][i] = clip_stats.n_nans
                            lau_data[str(col_name.joinpath('n_less05'))][i] = clip_stats.n_less05
                            lau_data[str(col_name.joinpath('n_more05'))][i]  = clip_stats.n_more05
                            lau_data[str(col_name.joinpath('total'))][i]  = clip_stats.total
                        
                        else:
                            print('Edge case detected, new chunk data is summed')

                            lau_data[str(col_name.joinpath('n_nans'))][i] = lau_data[str(col_name.joinpath('n_nans'))][i] + clip_stats.n_nans
                            lau_data[str(col_name.joinpath('n_less05'))][i] = lau_data[str(col_name.joinpath('n_less05'))][i] + clip_stats.n_less05
                            lau_data[str(col_name.joinpath('n_more05'))][i]  = lau_data[str(col_name.joinpath('n_more05'))][i] + clip_stats.n_more05
                            lau_data[str(col_name.joinpath('total'))][i]  = lau_data[str(col_name.joinpath('total'))][i] + clip_stats.total

                # Compute percentage flooded pixels per LAU, def: perc_flooded = 1-n_nans/n_total
                lau_data[str(col_name.joinpath('flooded'))] = 1-lau_data[str(col_name.joinpath('n_nans'))]/lau_data[str(col_name.joinpath('total'))]

# Write geodataframe
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")
ds_dir = coclico_data_dir.joinpath('WP4','LAU_stats')
out_file= ds_dir.joinpath('LAU_NUTS_CFHP_all.parquet')

lau_data.to_parquet(out_file)

In [None]:
len(tif_df)

In [None]:
# Trial reduced LAU dataframe
lau_data = gpd.read_parquet(r'p:\11207608-coclico\FULLTRACK_DATA\WP4\LAU_stats\LAU_NUTS_CFHP_all.parquet')
lau_data

In [None]:
col_name

In [None]:
lau_tif_join.total_bounds

In [None]:
out_file= ds_dir.joinpath('LAU_NUTS_CM_CFHP.parquet')
lau_data.to_parquet(out_file)
ds_dir

In [None]:
# Match LAU and NUTS using this work: https://edjnet.github.io/lau_centres/lau_nuts.html

LAU_NUTS_match = pd.read_csv(r'p:\11207608-coclico\FASTTRACK_DATA\XX_NUTS\lau_2020_nuts_2021_concordance_by_geo.csv')
# Drop unneeded columns
LAU_NUTS_match = LAU_NUTS_match.drop(columns=['gisco_id','country','lau_id','lau_name','population','area_km2','year'])
LAU_NUTS_match = LAU_NUTS_match.rename(columns={'fid': 'FID'})

# Merge the two dataframes
LAU_NUTS_data = test.merge(LAU_NUTS_match,on='FID')

# Reorder columns
# Retrieve column names
cols = LAU_NUTS_data.columns

# Move columns
cols = cols.insert([2],cols[-2:])

# Drop the old ones
cols = cols[:-2]

# Store in final form
LAU_NUTS_data = LAU_NUTS_data[cols]

# Write geodataframe to parquet
coclico_data_dir = p_drive.joinpath("11207608-coclico", "FULLTRACK_DATA")
ds_dir = coclico_data_dir.joinpath('WP4','LAU_stats')
out_file= ds_dir.joinpath('LAU_NUTS_CFHP.parquet')

# altered_lau_data.to_parquet(out_file_altered)
LAU_NUTS_data.to_parquet(out_file)

In [None]:
LAU_NUTS_data