In [1]:
import pandas as pd
import pystac
import json
import numpy as np
from pathlib import Path
import subprocess

In [2]:
# hard-coded STAC templates
CUR_CWD = Path.cwd().parent
STAC_DIR = CUR_CWD / "current"  # .parent.parent

# Load catalog using pystac_client
catalog = pystac.Catalog.from_file(str(STAC_DIR / "catalog.json"))

In [3]:
fe_text_file = Path(r"p:\11207608-coclico\docs\FE_text_titles.xlsx")
fe_text = pd.read_excel(fe_text_file, sheet_name="Sheet1")
fe_text

# Get single cell information
slp_description = fe_text.loc[fe_text["Collection"] == "slp", "Markdown"].values[0]

slp_description


"**Sea level rise projections** provide regional insights into future sea level changes, helping assess coastal flood risks and inform adaptation planning.  \n\n- **Spatial coverage:** Global Ocean and European seas  \n- **Sources:** IPCC AR6 report, Caron et al. Glacial Isostatic Adjustment (GIA), CMIP6  \n- **Common usage:** Coastal impact assessment, adaptation planning, sensitivity analysis  \n\nMore info about the dataset can be found in the <a href='https://www.openearth.nl/coclico-workbench/Datasets/#__tabbed_1_1' target='_blank' rel='noopener noreferrer'>User Handbook</a>"

In [12]:
# Set labels for FE
fe_labels = {
    "defense level": {
        "HIGH_DEFENDED_MAPS": "High Defended",
        "LOW_DEFENDED_MAPS": "Low Defended",
        "UNDEFENDED_MAPS": "No Defense"
    },
    "return period": {
        "static": "No Return Period",
        "1": "1 year",
        "5": "5 years",
        "10": "10 years",
        "20": "20 years",
        "50": "50 years",
        "100": "100 years",
        "200": "200 years",
        "500": "500 years",
        "1000": "1000 years"
    },
    "rp": {
        "static": "No Return Period",
        "1": "1 year",
        1.0: "1 year",
        "5": "5 years",
        5.0: "5 years",
        "10": "10 years",
        10.0: "10 years",
        "20": "20 years",
        20.0: "20 years",
        "50": "50 years",
        50.0: "50 years",
        "100": "100 years",
        100.0: "100 years",
        "200": "200 years",
        200.0: "200 years",
        "500": "500 years",
        500.0: "500 years",
        "1000": "1000 years",
        1000.0: "1000 years", 
    },
    "scenarios": {
        "None": "No Scenario",
        "SSP126": "SSP1-2.6",
        "SSP245": "SSP2-4.5",
        "SSP585": "SSP5-8.5",
        "ssp126": "SSP1-2.6",
        "ssp245": "SSP2-4.5",
        "ssp585": "SSP5-8.5",
        "High_End": "High End",
        "high_end": "High End",
        "Historical": "Historical",
        "RCP45": "RCP 4.5",
        "RCP85": "RCP 8.5"
    },
    "time": {
        "2010": "2010",
        "2030": "2030",
        "2050": "2050",
        "2100": "2100",
        "2150": "2150",
        # Added years with rounding to the nearest decade
        "2031": "2030",
        "2041": "2040",
        "2051": "2050",
        "2061": "2060",
        "2071": "2070",
        "2081": "2080",
        "2091": "2090",
        "2101": "2100",
        "2111": "2110",
        "2121": "2120",
        "2131": "2130",
        "2141": "2140",
        "2151": "2150"
    },
    "ensemble": {
        "msl_l": "16.7 Percentile",
        "msl_m": "50.0 Percentile",
        "msl_h": "83.3 Percentile"
    },
    "adaptation strategy": {
        "no_adaptation": "No Adaptation",
        "retreat": "Retreat",
        "protection": "Protect",
        "acc": "Accommodate",
        "protect_retreat": "Protect & Retreat"
    },
    "variables": {
        "hs": "Wave height (Hs)",
        "ssl": "Storm surge level (SSL)",
        "slr": "Sea level rise (SLR)",
        "tidal_range": "Tidal range"
    },
}

# Also add descriptions
fe_descriptions = {
    "defense level": "Policy-based coastal protection standards at the province level (NUTS2). High (low) defended: maximum (minimum) level of policy-based protection, if information is available. No defense: without protection (beyond what may be included in the DEM)",
    "return period": "Frequency at which an extreme event of total water level (related to storms) is expected to occur on average.",
    "rp": "Frequency at which an extreme event of total water level (related to storms) is expected to occur on average.",
    "scenarios": "Climate scenarios based on IPCC's Shared Socioeconomic Pathways (SSPs). High-end (if applicable) refers to P83 of SSP5-8.5, taking into account low-confidence processes. Other scenarios use the P50",
    "time": "Time slice applied to the data. Either present-day (2010) or future state representing medium (2030 till 2050) and long term (2050 till 2100) outlook.",
    "ensemble": "Uncertainty range of projections. 16.7, 50 and 83.3 indicate the lower bound, median and upper bound respectively",
    "adaptation strategy": "The adaptation strategy related to raising coastal defenses (protection), managed withdrawal from vulnerable areas (retreat), a combination of these strategies (protect & retreat), implementing flood-proofing measures (accommodation) or areas where adaptation measure are deemed inefficient (no adaptation)",
    "variables": "Different variables included in the dataset",
}

In [13]:
# Set up function for checking a collection based on a list of titles

def update_collection_FE_text(collection, fe_text_file, fe_labels, fe_descriptions):

    # Load FE_text excel file
    fe_text = pd.read_excel(fe_text_file, sheet_name="Sheet1")

    # Check if collection ID is in the FE_text
    collection_id = collection.id

    if not fe_text[fe_text["Collection"] == collection_id].empty:
        print('Collection found in FE-text, metadata will be updated for: ' + collection_id)

        # Retrieve info from FE_text
        fe_title = fe_text.loc[fe_text["Collection"] == collection_id, "Title"].values[0]
        fe_description = fe_text.loc[fe_text["Collection"] == collection_id, "Markdown"].values[0]
        fe_keywords = fe_text.loc[fe_text["Collection"] == collection_id, "Keywords"].values[0].split(", ")
        fe_styling = fe_text.loc[fe_text["Collection"] == collection_id, "Styling"]
        
        # Check if excel cell contains data
        if isinstance(fe_title, str):
            # Change collection title
            collection.title = fe_title

        # Check if excel cell contains data
        if isinstance(fe_description, str):
            # Change collection description
            collection.description = fe_description
        
        # Check if excel cell contains data
        if isinstance(fe_keywords, str):
            # Change collection keywords
            collection.keywords.extend(fe_keywords)
        
        # Retrieve styling from FE_text
        # Check if excel cell contains data
        if fe_styling.empty or fe_styling.isna().all() or fe_styling.str.strip().eq('').all():
            # No styling found
            print(f"No styling found for collection {collection_id}, skipping...")
        else:      
            # Convert the JSON string into a Python dictionary
            styling = json.loads(fe_text.loc[fe_text["Collection"] == collection_id, "Styling"].values[0])

            # Iterate over the keys and values of the dictionary
            for key, value in styling.items():
                if key.startswith('deltares:'):
                    # Change the collection properties
                    collection.extra_fields[key] = value

        # Add front-end labels to the collection
        collection = add_FE_labels_to_collection(collection, fe_labels, fe_descriptions)

        # Save collection
        collection.save()

    else:
        print('Collection not found in FE-text, no metadata update needed...')

def add_FE_labels_to_collection(collection, fe_labels, fe_descriptions):
    # Initialize an empty dict to store the front-end labels and descriptions
    summaries_labels = {}
    summaries_descriptions = {}

    # Get summaries as a dictionary
    summaries_dict = collection.summaries.to_dict()

    # Iterate over the summaries dictionary
    for key, values in summaries_dict.items():
        if key in fe_labels:  # Check if the key has front-end labels
            label_map = fe_labels[key]  # Get the label mapping for the key
            # Map the original summary values to the front-end labels
            summaries_labels[key] = {value: label_map.get(value, value) for value in values}
            
        # Add the front-end description for the key if it exists in fe_descriptions
        if key in fe_descriptions:
            summaries_descriptions[key] = fe_descriptions[key]
    # Add the summaries_labels to the collection properties
    collection.extra_fields['summaries_labels'] = summaries_labels

    # Add the front-end info description to the collection
    collection.extra_fields['summaries_descriptions'] = summaries_descriptions

    return collection

def update_catalog_FE_text(catalog, fe_text_file, fe_labels, fe_descriptions):
    
    # Load FE_text excel file
    fe_text = pd.read_excel(fe_text_file, sheet_name="Sheet1")

    # Get all collections from FE_text
    fe_collections = fe_text["Collection"].unique()

    # Loop through collections and update metadata
    for fe_collection_id in fe_collections:

        # Check if collection_id exists in the catalog
        if not catalog.get_child(fe_collection_id):
            print(f'Collection {fe_collection_id} not found in catalog, skipping...')
            continue

        print(f'Updating collection: {fe_collection_id}')
        collection = catalog.get_child(fe_collection_id)
        update_collection_FE_text(collection, fe_text_file, fe_labels, fe_descriptions)

        catalog.save()

def reset_catalog(folder="current", branch="main"):
    # Get the current working directory (should be in the 'notebooks' folder)
    current_dir = Path.cwd()

    # Go up two levels to reach the Git repo root
    repo_path = current_dir.parents[0]  # Adjust if necessary
    
    try:
        subprocess.run(
            ["git", "checkout", branch, "--", folder],
            check=True,
            cwd=repo_path  # Ensure git is executed from the repo root
        )
        print(f"✅ STAC Catalog and collections in folder: '{folder}' reset to match '{branch}' branch.")
    except subprocess.CalledProcessError as e:
        print(f"❌ Git command failed: {e}")


In [14]:
reset_catalog(folder="current")

✅ STAC Catalog and collections in folder: 'current' reset to match 'main' branch.


In [15]:
update_catalog_FE_text(catalog,fe_text_file, fe_labels, fe_descriptions)

Updating collection: slp
Collection found in FE-text, metadata will be updated for: slp
Updating collection: ssl
Collection found in FE-text, metadata will be updated for: ssl
No styling found for collection ssl, skipping...
Updating collection: eesl
Collection found in FE-text, metadata will be updated for: eesl
No styling found for collection eesl, skipping...
Updating collection: cfhp_all_maps
Collection found in FE-text, metadata will be updated for: cfhp_all_maps
Updating collection: coaster
Collection found in FE-text, metadata will be updated for: coaster
Updating collection: cba
Collection found in FE-text, metadata will be updated for: cba
Updating collection: be_maps
Collection found in FE-text, metadata will be updated for: be_maps
Updating collection: pp_maps
Collection found in FE-text, metadata will be updated for: pp_maps
Updating collection: bc_maps
Collection found in FE-text, metadata will be updated for: bc_maps
Updating collection: LAU_CM
Collection found in FE-text