# Glacier grids from RGI:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the RGI grid with OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
# --- System & utilities ---
import os
import sys
import re
import csv
import ast
import math
import traceback
import itertools
import random
import pickle
import logging
import warnings
from datetime import datetime
from functools import partial
from collections import Counter, defaultdict
from concurrent.futures import ProcessPoolExecutor, as_completed

# Add repo root for MBM imports
sys.path.append(os.path.join(os.getcwd(), "../../"))

# --- Data science stack ---
import numpy as np
import pandas as pd
import xarray as xr
import rioxarray
from tqdm.notebook import tqdm
import seaborn as sns
import matplotlib.pyplot as plt
from cmcrameri import cm

# --- Machine learning / DL ---
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset, WeightedRandomSampler, SubsetRandomSampler
from torch.optim.lr_scheduler import ReduceLROnPlateau
from skorch.helper import SliceDataset
from skorch.callbacks import EarlyStopping, LRScheduler, Checkpoint

# --- Cartography / plotting ---
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER

# --- Custom MBM modules ---
import massbalancemachine as mbm
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.config_CH import *
from scripts.nn_helpers import *
from scripts.xgb_helpers import *
from scripts.geodata import *
from scripts.NN_networks import *
from scripts.geodata_plots import *

# --- Warnings & autoreload (notebook) ---
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2

# --- Configuration ---
cfg = mbm.SwitzerlandConfig()
seed_all(cfg.seed)
print("Using seed:", cfg.seed)

# --- CUDA / device ---
if torch.cuda.is_available():
    print("CUDA is available")
    free_up_cuda()
else:
    print("CUDA is NOT available")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
gdirs, rgidf = initialize_oggm_glacier_directories(
    cfg,
    rgi_region="11",
    rgi_version="62",
    base_url=
    "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L1-L2_files/2025.6/elev_bands_w_data/",
    log_level='WARNING',
    task_list=None,
)

# Save OGGM xr for all needed glaciers in RGI region 11.6:
df_missing = export_oggm_grids(cfg, gdirs)

path_rgi = cfg.dataPath + 'GLAMOS/RGI/nsidc0770_11.rgi60.CentralEurope/11_rgi60_CentralEurope.shp'

# load RGI shapefile
gdf = gpd.read_file(path_rgi)
# reproject to a local equal-area projection (example: EPSG:3035 for Europe)
gdf_proj = gdf.to_crs(3035)
gdf_proj.rename(columns={"RGIId": "rgi_id"}, inplace=True)
# gdf_proj.set_index('rgi_id', inplace=True)
gdf_proj["area_m2"] = gdf_proj.geometry.area
gdf_proj["area_km2"] = gdf_proj["area_m2"] / 1e6

df_missing = df_missing.merge(gdf_proj[['area_km2', 'rgi_id']], on="rgi_id")

# total glacier area
total_area = gdf_proj["area_km2"].sum()

# explode the list of missing vars into rows (one var per row)
df_exploded = df_missing.explode("missing_vars")

# 1) COUNT: number of glaciers missing each variable
counts_missing_per_var = (
    df_exploded.groupby("missing_vars")["rgi_id"].nunique().sort_values(
        ascending=False))

# 2) TOTAL % AREA with ANY missing var
total_missing_area_km2 = df_missing["area_km2"].sum()
total_missing_area_pct = (total_missing_area_km2 / total_area) * 100

print(f"Total glacier area with ANY missing variable: "
      f"{total_missing_area_km2:,.2f} km² "
      f"({total_missing_area_pct:.2f}%)")

# Optional: also show % area per variable (kept from your earlier logic)
area_missing_per_var = (
    df_exploded.groupby("missing_vars")["area_km2"].sum().sort_values(
        ascending=False))
perc_missing_per_var = (area_missing_per_var / total_area) * 100

print("\n% of total glacier area missing per variable:")
for var, pct in perc_missing_per_var.items():
    print(f"  - {var}: {pct:.2f}%")

# ---- barplot: number of glaciers missing each variable ----
plt.figure(figsize=(7, 4))
plt.bar(counts_missing_per_var.index, counts_missing_per_var.values)
plt.xlabel("Missing variable")
plt.ylabel("Number of glaciers")
plt.title("Count of glaciers missing each variable")
plt.tight_layout()
plt.show()

In [None]:
# RGI Ids:
# Read glacier ids:
rgi_df = pd.read_csv(cfg.dataPath + path_glacier_ids, sep=',')
rgi_df.rename(columns=lambda x: x.strip(), inplace=True)
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)
rgi_df.loc['rhone']

## Create RGI grids for all glaciers:

### Create masked xarray grids:

In [None]:
def create_masked_glacier(path_RGIs, rgi_gl, path_out_tiff):
    """
    Create masked glacier dataset from OGGM .zarr file,
    and save the masked DEM ('masked_elev') as a GeoTIFF in LV95 (EPSG:2056).
    """

    # --- Load dataset ---
    ds = xr.open_zarr(os.path.join(path_RGIs, f"{rgi_gl}.zarr"))

    # --- Check for glacier mask ---
    if "glacier_mask" not in ds:
        raise ValueError(
            f"'glacier_mask' variable not found in dataset {rgi_gl}")

    # --- Build mask (NaN outside glacier) ---
    glacier_mask = np.where(ds["glacier_mask"].values == 0, np.nan,
                            ds["glacier_mask"].values)

    # --- Apply mask to core variables ---
    ds = ds.assign(masked_slope=glacier_mask * ds["slope"])
    ds = ds.assign(masked_elev=glacier_mask * ds["topo"])
    ds = ds.assign(masked_aspect=glacier_mask * ds["aspect"])
    ds = ds.assign(masked_dis=glacier_mask * ds["dis_from_border"])

    # --- Optional fields ---
    if "hugonnet_dhdt" in ds:
        ds = ds.assign(masked_hug=glacier_mask * ds["hugonnet_dhdt"])
    if "consensus_ice_thickness" in ds:
        ds = ds.assign(masked_cit=glacier_mask * ds["consensus_ice_thickness"])
    if "millan_v" in ds:
        ds = ds.assign(masked_miv=glacier_mask * ds["millan_v"])

    # --- Get indices where glacier_mask == 1 ---
    glacier_indices = np.where(ds["glacier_mask"].values == 1)

    # --- Extract masked elevation ---
    dem = ds["masked_elev"]

    # --- Attach CRS and write GeoTIFF ---
    dem = dem.rio.write_crs("EPSG:2056", inplace=True)

    # Prepare output folder
    os.makedirs(path_out_tiff, exist_ok=True)
    out_tif = os.path.join(path_out_tiff, f"{rgi_gl}.tif")

    dem.rio.to_raster(
        out_tif,
        dtype="float32",
        compress="LZW",
        BIGTIFF="IF_SAFER",
        tiled=True,
        predictor=3,  # better compression for float rasters
    )

    #print(f"Saved masked DEM to: {out_tif}")

    return ds, glacier_indices

In [None]:
path_xr_grids = os.path.join(cfg.dataPath, "GLAMOS/topo/RGI_v6_11",
                             "xr_masked_grids/")
path_xr_svf = os.path.join(cfg.dataPath, "GLAMOS/topo/RGI_v6_11",
                           "svf_nc_latlon/")
path_RGIs = cfg.dataPath + path_OGGM + "xr_grids/"
path_geotiff = os.path.join(cfg.dataPath, "GLAMOS/topo/RGI_v6_11",
                            "geotiff_meters_lv95/")

glaciers = os.listdir(path_RGIs)
print(f"Found {len(glaciers)} glaciers in RGI region 11.6")

RUN = False
if RUN:
    emptyfolder(path_xr_grids)

    for gdir in tqdm(gdirs):
        rgi_gl = gdir.rgi_id

        try:
            # 1) Build masked OGGM grid in LV95 (x/y)
            ds, glacier_indices = create_masked_glacier(
                path_RGIs, rgi_gl, path_geotiff)
        except ValueError as e:
            print(f"Skipping {rgi_gl}: {e}")
            continue

        # 2) (Optional) coarsen in projected space
        dx_m, dy_m = get_res_from_projected(ds)
        if 20 < dx_m < 50:
            ds = coarsenDS_mercator(ds, target_res_m=50)

        # 3) Reproject masked grid to WGS84 lat/lon
        original_proj = ds.pyproj_srs
        ds = ds.rio.write_crs(original_proj)
        ds_latlon = ds.rio.reproject("EPSG:4326").rename({
            "x": "lon",
            "y": "lat"
        })

        # 4) Load corresponding SVF (already in lat/lon) and merge
        svf_path = os.path.join(path_xr_svf, f"{rgi_gl}_svf_latlon.nc")
        if not os.path.exists(svf_path):
            print(f"SVF not found for {rgi_gl}: {svf_path}")
            # still save ds_latlon without SVF
            ds_latlon.to_zarr(os.path.join(path_xr_grids, f"{rgi_gl}.zarr"))
            continue

        ds_svf = xr.open_dataset(svf_path)

        # Make sure coords are named lon/lat
        if "x" in ds_svf.dims or "y" in ds_svf.dims:
            ds_svf = ds_svf.rename({"x": "lon", "y": "lat"})
        if "longitude" in ds_svf.dims or "latitude" in ds_svf.dims:
            ds_svf = ds_svf.rename({"longitude": "lon", "latitude": "lat"})

        # Sort ascending for interpolation stability
        if ds_latlon.lon[0] > ds_latlon.lon[-1]:
            ds_latlon = ds_latlon.sortby("lon")
        if ds_latlon.lat[0] > ds_latlon.lat[-1]:
            ds_latlon = ds_latlon.sortby("lat")
        if ds_svf.lon[0] > ds_svf.lon[-1]: ds_svf = ds_svf.sortby("lon")
        if ds_svf.lat[0] > ds_svf.lat[-1]: ds_svf = ds_svf.sortby("lat")

        svf_vars = [
            v for v in ["svf", "asvf", "opns"] if v in ds_svf.data_vars
        ]

        # If grids match, merge; else interpolate SVF to ds_latlon grid
        if (np.array_equal(ds_latlon.lon.values, ds_svf.lon.values)
                and np.array_equal(ds_latlon.lat.values, ds_svf.lat.values)):
            ds_latlon = xr.merge([ds_latlon, ds_svf[svf_vars]])
        else:
            svf_on_grid = ds_svf[svf_vars].interp(lon=ds_latlon.lon,
                                                  lat=ds_latlon.lat,
                                                  method="linear")
            for v in svf_vars:
                svf_on_grid[v] = svf_on_grid[v].astype("float32")
            ds_latlon = ds_latlon.assign(
                **{v: svf_on_grid[v]
                   for v in svf_vars})

        # Add masked versions using glacier_mask already in ds_latlon
        if "glacier_mask" in ds_latlon:
            gmask = xr.where(ds_latlon["glacier_mask"] == 1, 1.0, np.nan)
            for v in svf_vars:
                ds_latlon[f"masked_{v}"] = gmask * ds_latlon[v]

        # 5) Save final lat/lon grid (with SVF) to Zarr
        save_path = os.path.join(path_xr_grids, f"{rgi_gl}.zarr")
        ds_latlon.to_zarr(save_path)

rhone_rgi = "RGI60-11.01238"
rhone_path = os.path.join(path_geotiff, f"{rhone_rgi}.tif")

# Open DEM
rhone = rioxarray.open_rasterio(rhone_path).squeeze()

# Plot
plt.figure(figsize=(8, 6))
rhone.plot(cmap="terrain")
plt.title(f"DEM of Glacier {rhone_rgi}", fontsize=13)
plt.xlabel("Easting [m]")
plt.ylabel("Northing [m]")
plt.show()

In [None]:
# open xarray grid
xr.open_zarr(path_xr_grids + f'{rhone_rgi}.zarr').svf.plot()

### Create monthly dataframes:

In [None]:
# # Climate columns
# vois_climate = [
#     't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
# ]
# # Topographical columns
# vois_topographical = [
#     "aspect",
#     "slope",
#     "hugonnet_dhdt",
#     "consensus_ice_thickness",
#     "millan_v",
#     "topo",
#     "svf"
# ]

# RUN = True
# path_rgi_alps = os.path.join(cfg.dataPath, 'GLAMOS/topo/gridded_topo_inputs/RGI_v6_11_svf/')
# #emptyfolder(path_rgi_alps)

# # ---- helpers ----
# def expected_fname(rgi_gl: str, year: int) -> str:
#     # Expected: RGI60-11.00001_grid_1999.parquet
#     return f"{rgi_gl}_grid_{year}.parquet"

# def years_present_for_glacier(folder_path: str, rgi_gl: str) -> set:
#     """Return the set of 4-digit years found for this glacier in its output folder."""
#     if not os.path.isdir(folder_path):
#         return set()
#     rx = re.compile(rf"^{re.escape(rgi_gl)}_grid_(\d{{4}})\.parquet$")
#     years_found = set()
#     for f in os.listdir(folder_path):
#         m = rx.match(f)
#         if m:
#             years_found.add(int(m.group(1)))
#     return years_found

# def glacier_is_complete(rgi_gl: str, years: range) -> bool:
#     folder_path = os.path.join(path_rgi_alps, rgi_gl)
#     found = years_present_for_glacier(folder_path, rgi_gl)
#     return set(years).issubset(found)

# # ---- main ----
# if RUN:
#     # inclusive 1999..2024
#     years = range(1999, 2025)

#     os.makedirs(path_rgi_alps, exist_ok=True)

#     valid_rgis = [
#         f.replace('.zarr', '') for f in os.listdir(path_xr_grids)
#         if f.endswith('.zarr')
#     ]

#     # Glaciers that are already complete (all yearly files exist)
#     complete_rgis = [r for r in valid_rgis if glacier_is_complete(r, years)]
#     # Glaciers that still need work
#     rest_rgis = list(set(valid_rgis) - set(complete_rgis))

#     print(f"Glaciers already complete: {len(complete_rgis)}")
#     print(f"Number of glaciers to process: {len(rest_rgis)}")

#     for gdir in tqdm(gdirs, desc="Processing glaciers"):
#         rgi_gl = gdir.rgi_id

#         if rgi_gl not in valid_rgis:
#             print(f"Skipping {rgi_gl}: not found in valid RGI glaciers")
#             continue

#         # Skip if already fully complete
#         if glacier_is_complete(rgi_gl, years):
#             continue

#         try:
#             file_path = os.path.join(path_xr_grids, f"{rgi_gl}.zarr")
#             if not os.path.exists(file_path):
#                 raise FileNotFoundError(f"Missing file: {file_path}")

#             # Open Zarr
#             try:
#                 ds = xr.open_zarr(file_path, consolidated=True)
#             except Exception:
#                 ds = xr.open_zarr(file_path)

#             # Build grid for all years once
#             try:
#                 df_grid = create_glacier_grid_RGI(ds, years, rgi_gl)
#             except Exception as e:
#                 print(f"Failed creating glacier grid for {rgi_gl}: {e}")
#                 continue

#             df_grid.reset_index(drop=True, inplace=True)

#             # Add GLWD_ID and GLACIER columns
#             df_grid['GLWD_ID'] = [
#                 mbm.data_processing.utils.get_hash(f"{r}_{y}")
#                 for r, y in zip(df_grid['RGIId'].astype(str),
#                                 df_grid['YEAR'].astype(str))
#             ]
#             df_grid['GLWD_ID'] = df_grid['GLWD_ID'].astype(str)
#             df_grid['GLACIER'] = df_grid['RGIId']

#             # Output folder
#             folder_path = os.path.join(path_rgi_alps, rgi_gl)
#             os.makedirs(folder_path, exist_ok=True)

#             # Determine missing years for this glacier (idempotent)
#             existing_years = years_present_for_glacier(folder_path, rgi_gl)
#             missing_years = [y for y in years if y not in existing_years]

#             if not missing_years:
#                 # Another process may have finished meanwhile
#                 continue

#             for year in missing_years:
#                 try:
#                     df_grid_y = df_grid[df_grid.YEAR == year].copy()
#                     if df_grid_y.empty:
#                         # No data for that year; keep going
#                         continue

#                     # Build dataset & add climate features
#                     try:
#                         dataset_grid_yearly = mbm.data_processing.Dataset(
#                             cfg=cfg,
#                             data=df_grid_y,
#                             region_name='CH',
#                             region_id=11,
#                             data_path=os.path.join(cfg.dataPath, path_PMB_GLAMOS_csv)
#                         )

#                         era5_climate_data = os.path.join(
#                             cfg.dataPath, path_ERA5_raw, 'era5_monthly_averaged_data_Alps.nc'
#                         )
#                         geopotential_data = os.path.join(
#                             cfg.dataPath, path_ERA5_raw, 'era5_geopotential_pressure_Alps.nc'
#                         )

#                         dataset_grid_yearly.get_climate_features(
#                             climate_data=era5_climate_data,
#                             geopotential_data=geopotential_data,
#                             change_units=True,
#                             smoothing_vois={'vois_climate': vois_climate,
#                                             'vois_other': ['ALTITUDE_CLIMATE']}
#                         )
#                     except Exception as e:
#                         print(f"Failed adding climate features for {rgi_gl} (year {year}): {e}")
#                         continue

#                     vois_topographical_sub = [voi for voi in vois_topographical
#                                               if voi in df_grid_y.columns]

#                     dataset_grid_yearly.convert_to_monthly(
#                         meta_data_columns=cfg.metaData,
#                         vois_climate=vois_climate,
#                         vois_topographical=vois_topographical_sub
#                     )

#                     save_path = os.path.join(folder_path, expected_fname(rgi_gl, year))
#                     # If a stale/partial file exists, overwrite it
#                     dataset_grid_yearly.data.to_parquet(
#                         save_path, engine="pyarrow", compression="snappy"
#                     )

#                 except Exception as e:
#                     print(f"Failed processing {rgi_gl} for year {year}: {e}")
#                     continue

#         except Exception as e:
#             print(f"Error with glacier {rgi_gl}: {e}")
#             continue

In [None]:
# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
vois_topographical = [
    "aspect", "slope", "hugonnet_dhdt", "consensus_ice_thickness", "millan_v",
    "topo", "svf"
]

RUN = True
path_rgi_alps = os.path.join(cfg.dataPath,
                             'GLAMOS/topo/gridded_topo_inputs/RGI_v6_11_svf/')
# #emptyfolder(path_rgi_alps)

# Avoid BLAS/OpenMP oversubscription inside each worker
os.environ.setdefault("OMP_NUM_THREADS", "1")
os.environ.setdefault("OPENBLAS_NUM_THREADS", "1")
os.environ.setdefault("MKL_NUM_THREADS", "1")
os.environ.setdefault("NUMEXPR_NUM_THREADS", "1")

# ------------ config ------------
years = range(2000, 2024)  # inclusive
#max_workers = max(1, min(os.cpu_count() or 4, 8))  # be gentle with I/O
#max_workers = min(os.cpu_count(), 16)   # or 20 if SSD & RAM are strong
max_workers = min(os.cpu_count(), 12)  # or 20 if SSD & RAM are strong

# ------------ helpers (unchanged) ------------
def expected_fname(rgi_gl: str, year: int) -> str:
    return f"{rgi_gl}_grid_{year}.parquet"


def years_present_for_glacier(folder_path: str, rgi_gl: str) -> set:
    if not os.path.isdir(folder_path):
        return set()
    rx = re.compile(rf"^{re.escape(rgi_gl)}_grid_(\d{{4}})\.parquet$")
    years_found = set()
    for f in os.listdir(folder_path):
        m = rx.match(f)
        if m:
            years_found.add(int(m.group(1)))
    return years_found


def glacier_is_complete(rgi_gl: str, years: range) -> bool:
    folder_path = os.path.join(path_rgi_alps, rgi_gl)
    found = years_present_for_glacier(folder_path, rgi_gl)
    return set(years).issubset(found)


# ------------ per-glacier worker ------------
def process_one_glacier(rgi_gl: str) -> tuple[str, str]:
    """
    Return (rgi_gl, 'ok') or (rgi_gl, 'skip:<reason>'/'error:<message>')
    Runs in a separate process: DO NOT capture big globals except paths & configs safely.
    """
    try:
        # Validate input files
        file_path = os.path.join(path_xr_grids, f"{rgi_gl}.zarr")
        if not os.path.exists(file_path):
            return (rgi_gl, f"skip:missing_zarr {file_path}")

        # Skip if already fully complete
        folder_path = os.path.join(path_rgi_alps, rgi_gl)
        os.makedirs(folder_path, exist_ok=True)
        existing_years = years_present_for_glacier(folder_path, rgi_gl)
        missing_years = [y for y in years if y not in existing_years]
        if not missing_years:
            return (rgi_gl, "skip:complete")

        # Open Zarr *inside* the worker
        try:
            ds = xr.open_zarr(file_path, consolidated=True)
        except Exception:
            ds = xr.open_zarr(file_path)

        # Build grid (once) for all years
        try:
            df_grid = create_glacier_grid_RGI(ds, years, rgi_gl)
        except Exception as e:
            return (rgi_gl, f"error:create_grid {e}")

        df_grid = df_grid.reset_index(drop=True)

        # GLWD_ID & GLACIER
        df_grid['GLWD_ID'] = [
            mbm.data_processing.utils.get_hash(f"{r}_{y}") for r, y in zip(
                df_grid['RGIId'].astype(str), df_grid['YEAR'].astype(str))
        ]
        df_grid['GLWD_ID'] = df_grid['GLWD_ID'].astype(str)
        df_grid['GLACIER'] = df_grid['RGIId']

        # Process only the missing years
        for year in missing_years:
            try:
                df_grid_y = df_grid[df_grid.YEAR == year].copy()
                if df_grid_y.empty:
                    continue

                # Build dataset & add climate features
                dataset_grid_yearly = mbm.data_processing.Dataset(
                    cfg=cfg,
                    data=df_grid_y,
                    region_name='CH',
                    region_id=11,
                    data_path=os.path.join(cfg.dataPath, path_PMB_GLAMOS_csv))

                era5_climate_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw,
                    'era5_monthly_averaged_data_Alps.nc')
                geopotential_data = os.path.join(
                    cfg.dataPath, path_ERA5_raw,
                    'era5_geopotential_pressure_Alps.nc')

                dataset_grid_yearly.get_climate_features(
                    climate_data=era5_climate_data,
                    geopotential_data=geopotential_data,
                    change_units=True,
                    smoothing_vois={
                        'vois_climate': vois_climate,
                        'vois_other': ['ALTITUDE_CLIMATE']
                    })

                vois_topographical_sub = [
                    v for v in vois_topographical if v in df_grid_y.columns
                ]

                dataset_grid_yearly.convert_to_monthly(
                    meta_data_columns=cfg.metaData,
                    vois_climate=vois_climate,
                    vois_topographical=vois_topographical_sub)

                save_path = os.path.join(folder_path,
                                         expected_fname(rgi_gl, year))
                dataset_grid_yearly.data.to_parquet(save_path,
                                                    engine="pyarrow",
                                                    compression="snappy")

            except Exception as e:
                # keep going with other years but record error
                return (rgi_gl, f"error:year_{year} {e}")

        return (rgi_gl, "ok")

    except Exception as e:
        return (rgi_gl, f"error:{e}\n{traceback.format_exc()}")


# ------------ main parallel driver ------------
if RUN:
    os.makedirs(path_rgi_alps, exist_ok=True)
    #emptyfolder(path_rgi_alps)

    valid_rgis = [
        f.replace('.zarr', '') for f in os.listdir(path_xr_grids)
        if f.endswith('.zarr')
    ]

    # Filter to those not fully complete
    targets = [r for r in valid_rgis if not glacier_is_complete(r, years)]
    print(
        f"Total valid glaciers: {len(valid_rgis)} | Remaining to process: {len(targets)}"
    )

    results = {"ok": [], "skip": [], "error": []}

    with ProcessPoolExecutor(max_workers=max_workers) as ex:
        futs = {ex.submit(process_one_glacier, rgi): rgi for rgi in targets}
        for fut in tqdm(as_completed(futs), total=len(futs), desc="Glaciers"):
            rgi = futs[fut]
            try:
                rid, status = fut.result()
            except Exception as e:
                rid, status = rgi, f"error:{e}"
            if status.startswith("ok"):
                results["ok"].append(rgi)
            elif status.startswith("skip"):
                results["skip"].append((rgi, status))
            else:
                results["error"].append((rgi, status))

    print(
        f"\nFinished. ok={len(results['ok'])}, skip={len(results['skip'])}, error={len(results['error'])}"
    )
    if results["error"]:
        for rgi, msg in results["error"][:10]:
            print("  ", rgi, "→", msg)


In [None]:
# Look at one example
for gdir in gdirs:
    if gdir.rgi_id == 'RGI60-11.00001':
        gdir_rhone = gdir

rgi_gl = gdir_rhone.rgi_id

year = 2000
df = pd.read_parquet(
    os.path.join(path_rgi_alps, rgi_gl, f"{rgi_gl}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
print(df['t2m'].unique())

year = 2004
df = pd.read_parquet(
    os.path.join(path_rgi_alps, rgi_gl, f"{rgi_gl}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
print(df['t2m'].unique())

In [None]:
# Look at one example
for gdir in gdirs:
    if gdir.rgi_id == 'RGI60-11.01238':
        gdir_rhone = gdir

year = 2000
rgi_gl = gdir_rhone.rgi_id

df = pd.read_parquet(
    os.path.join(path_rgi_alps, rgi_gl, f"{rgi_gl}_grid_{year}.parquet"))
df = df[df.MONTHS == 'sep']
fig, axs = plt.subplots(2, 3, figsize=(15, 10))
voi = [
    't2m', 'tp', 'ALTITUDE_CLIMATE', 'ELEVATION_DIFFERENCE', 'hugonnet_dhdt',
    'consensus_ice_thickness'
]
axs = axs.flatten()
for i, var in enumerate(voi):
    sns.scatterplot(df,
                    x='POINT_LON',
                    y='POINT_LAT',
                    hue=var,
                    s=5,
                    alpha=0.5,
                    palette='twilight_shifted',
                    ax=axs[i])