# Glacier grids from RGI:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the RGI grid with OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
# --- System & utilities ---
import os
import sys
import warnings
from tqdm.notebook import tqdm

# Add repo root for MBM imports
sys.path.append(os.path.join(os.getcwd(), "../../"))

# --- Data science stack ---
import matplotlib.pyplot as plt

# --- Custom MBM modules ---
import massbalancemachine as mbm

# --- Warnings & autoreload (notebook) ---
warnings.filterwarnings("ignore")
%load_ext autoreload
%autoreload 2


# --- Configuration ---
cfg = mbm.EuropeConfig()

from regions.TF_Europe.scripts.config_TF_Europe import *
from regions.TF_Europe.scripts.oggm import *
from regions.TF_Europe.scripts.geodata import *

# Plot styles:
mbm.utils.seed_all(cfg.seed)
mbm.plots.use_mbm_style()

print("Using seed:", cfg.seed)

if torch.cuda.is_available():
    print("CUDA is available")
    mbm.utils.free_up_cuda()
else:
    print("CUDA is NOT available")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

## Initialize OGGM directories:

In [None]:
def rgi_outline_path_from_spec(cfg, spec: dict) -> str:
    shp_rel = Path("RGI_v6") / spec["folder"] / spec["file"]
    return str(Path(cfg.dataPath) / shp_rel)


def plot_missing_for_one_region(
    df_missing,
    cfg,
    region_id: str,
    region_spec: dict,
    epsg_area: int = 3035,
):
    """
    df_missing columns expected:
      - rgi_id (string)
      - missing_vars (list-like)
    """
    if df_missing is None or len(df_missing) == 0:
        print(
            f"[{region_id} {region_spec.get('name','')}] No missing glaciers.")
        return

    # Load outlines for THIS region only
    shp_abs = rgi_outline_path_from_spec(cfg, region_spec)
    gdf = gpd.read_file(shp_abs).to_crs(epsg_area).rename(
        columns={"RGIId": "rgi_id"})

    gdf["area_km2"] = gdf.geometry.area / 1e6
    total_area = gdf["area_km2"].sum()

    # Merge areas onto missing table
    df_reg = df_missing.merge(gdf[["rgi_id", "area_km2"]],
                              on="rgi_id",
                              how="left")

    # Total missing area
    total_missing_area_km2 = df_reg["area_km2"].sum()
    total_missing_area_pct = (total_missing_area_km2 /
                              total_area) * 100 if total_area else float("nan")

    # Explode vars
    df_exploded = df_reg.explode("missing_vars")

    counts_missing_per_var = (
        df_exploded.groupby("missing_vars")["rgi_id"].nunique().sort_values(
            ascending=False))

    area_missing_per_var = (
        df_exploded.groupby("missing_vars")["area_km2"].sum().sort_values(
            ascending=False))
    perc_missing_per_var = (area_missing_per_var / total_area
                            ) * 100 if total_area else area_missing_per_var * 0

    # Print
    region_label = f"{region_id} {region_spec.get('name','')}".strip()
    print(f"\n[{region_label}]")
    print(f"Total glacier area with ANY missing variable: "
          f"{total_missing_area_km2:,.2f} km² ({total_missing_area_pct:.2f}%)")

    print("% of total glacier area missing per variable:")
    for var, pct in perc_missing_per_var.items():
        print(f"  - {var}: {pct:.2f}%")

    # Plot
    plt.figure(figsize=(7, 4))
    plt.bar(counts_missing_per_var.index.astype(str),
            counts_missing_per_var.values)
    plt.xlabel("Missing variable")
    plt.ylabel("Number of glaciers")
    plt.title(f"{region_label} – Count of glaciers missing each variable")
    plt.xticks(rotation=45, ha="right")
    plt.tight_layout()
    plt.show()

In [None]:
# for each region in Europe
for rgi_id, spec in RGI_REGIONS.items():
    print(f"Processing glacier region id: {rgi_id}...")

    gdirs, rgidf = initialize_oggm_glacier_directories(
        cfg,
        rgi_region=rgi_id,
        rgi_version="62",
        base_url=
        "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L1-L2_files/2025.6/elev_bands_w_data/",
        log_level="WARNING",
        task_list=None,
    )

    df_missing = export_oggm_grids(cfg, gdirs, rgi_region=rgi_id)

    # Plot + stats for THIS region's missing table
    plot_missing_for_one_region(df_missing, cfg, rgi_id, spec, epsg_area=3035)

## Export geotifs of DEMs:

This is necessary to compute the SVF from DEMs in the next notebook.

In [None]:
RUN = True
if RUN:
    for rgi_region, spec in RGI_REGIONS.items():
        rgi_region = str(rgi_region).zfill(2)

        path_RGIs = Path(
            cfg.dataPath) / f"OGGM/rgi_region_{rgi_region}" / "xr_grids"
        path_geotiff = Path(
            cfg.dataPath) / "RGI_v6" / spec["folder"] / "geotiff"

        if not path_RGIs.is_dir():
            print(
                f"Skipping RGI {rgi_region}: xr_grids not found: {path_RGIs}")
            continue

        # find all glaciers saved as *.zarr
        zarr_paths = sorted(path_RGIs.glob("*.zarr"))
        print(
            f"\nRGI {rgi_region} ({spec.get('name','')}): found {len(zarr_paths)} glacier zarr stores"
        )

        # empty output folder per region (only if you really want that behavior)
        emptyfolder(str(path_geotiff))

        for zp in tqdm(zarr_paths, desc=f"Export DEMs RGI {rgi_region}"):
            rgi_gl = zp.stem  # everything before ".zarr", e.g. "RGI60-06.00568"

            try:
                export_glacier_dems_to_geotiff(
                    str(path_RGIs),  # xr_grids folder
                    rgi_gl,  # glacier id
                    str(path_geotiff)  # output folder
                )
            except ValueError as e:
                print(f"Skipping {rgi_gl}: {e}")
                continue