# Glacier grids from RGI:

Creates monthly grid files for the MBM to make PMB predictions over the whole glacier grid. The files come from the RGI grid with OGGM topography. Computing takes a long time because of the conversion to monthly format.
## Setting up:

In [None]:
import sys, os

sys.path.append(os.path.join(os.getcwd(),
                             '../../'))  # Add root of repo to import MBM
import csv
from functools import partial

import pandas as pd
import warnings
from tqdm.notebook import tqdm
import re
import matplotlib.pyplot as plt
import seaborn as sns
from cmcrameri import cm
import xarray as xr
import massbalancemachine as mbm
from collections import defaultdict
import logging
from skorch.helper import SliceDataset
from datetime import datetime
from skorch.callbacks import EarlyStopping, LRScheduler, Checkpoint
import itertools
import random
import pickle
from collections import Counter
import ast
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from scripts.helpers import *
from scripts.glamos_preprocess import *
from scripts.plots import *
from scripts.config_CH import *
from scripts.nn_helpers import *
from scripts.xgb_helpers import *
from scripts.geodata import *
from scripts.NN_networks import *
from scripts.geodata_plots import *

warnings.filterwarnings('ignore')
%load_ext autoreload
%autoreload 2

cfg = mbm.SwitzerlandConfig()

seed_all(cfg.seed)
print("Using seed:", cfg.seed)

from torch.utils.data import Subset
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torch.utils.data import Dataset
from torch.utils.data import WeightedRandomSampler, SubsetRandomSampler
import torch.nn as nn

if torch.cuda.is_available():
    print("CUDA is available")
    free_up_cuda()
else:
    print("CUDA is NOT available")

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# Plot styles:
path_style_sheet = 'scripts/example.mplstyle'
plt.style.use(path_style_sheet)

# Climate columns
vois_climate = [
    't2m', 'tp', 'slhf', 'sshf', 'ssrd', 'fal', 'str', 'u10', 'v10'
]
# Topographical columns
vois_topographical = [
    "aspect",
    "slope",
    "hugonnet_dhdt",
    "consensus_ice_thickness",
    "millan_v",
    "topo",
]

glacier_outline_rgi = gpd.read_file(cfg.dataPath + path_rgi_outlines)


In [None]:
gdirs, rgidf = initialize_oggm_glacier_directories(
    cfg,
    rgi_region="11",
    rgi_version="6",
    # base_url=
    # "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L3-L5_files/2023.1/elev_bands/W5E5_w_data/",
    base_url=
    "https://cluster.klima.uni-bremen.de/~oggm/gdirs/oggm_v1.6/L1-L2_files/2025.6/elev_bands_w_data/",
    log_level='WARNING',
    task_list=None,
)

# Save OGGM xr for all needed glaciers in RGI region 11.6:
df_missing = export_oggm_grids(cfg, gdirs)

path_rgi = cfg.dataPath + 'GLAMOS/RGI/nsidc0770_11.rgi60.CentralEurope/11_rgi60_CentralEurope.shp'

# load RGI shapefile
gdf = gpd.read_file(path_rgi)
# reproject to a local equal-area projection (example: EPSG:3035 for Europe)
gdf_proj = gdf.to_crs(3035)
gdf_proj.rename(columns={"RGIId": "rgi_id"}, inplace=True)
# gdf_proj.set_index('rgi_id', inplace=True)
gdf_proj["area_m2"] = gdf_proj.geometry.area
gdf_proj["area_km2"] = gdf_proj["area_m2"] / 1e6

df_missing = df_missing.merge(gdf_proj[['area_km2', 'rgi_id']], on="rgi_id")

# total glacier area
total_area = gdf_proj["area_km2"].sum()

# explode the list of missing vars into rows (one var per row)
df_exploded = df_missing.explode("missing_vars")

# 1) COUNT: number of glaciers missing each variable
counts_missing_per_var = (
    df_exploded.groupby("missing_vars")["rgi_id"].nunique().sort_values(
        ascending=False))

# 2) TOTAL % AREA with ANY missing var
total_missing_area_km2 = df_missing["area_km2"].sum()
total_missing_area_pct = (total_missing_area_km2 / total_area) * 100

print(f"Total glacier area with ANY missing variable: "
      f"{total_missing_area_km2:,.2f} km² "
      f"({total_missing_area_pct:.2f}%)")

# Optional: also show % area per variable (kept from your earlier logic)
area_missing_per_var = (
    df_exploded.groupby("missing_vars")["area_km2"].sum().sort_values(
        ascending=False))
perc_missing_per_var = (area_missing_per_var / total_area) * 100

print("\n% of total glacier area missing per variable:")
for var, pct in perc_missing_per_var.items():
    print(f"  - {var}: {pct:.2f}%")

# ---- barplot: number of glaciers missing each variable ----
plt.figure(figsize=(7, 4))
plt.bar(counts_missing_per_var.index, counts_missing_per_var.values)
plt.xlabel("Missing variable")
plt.ylabel("Number of glaciers")
plt.title("Count of glaciers missing each variable")
plt.tight_layout()
plt.show()

In [None]:
# RGI Ids:
# Read glacier ids:
rgi_df = pd.read_csv(cfg.dataPath + path_glacier_ids, sep=',')
rgi_df.rename(columns=lambda x: x.strip(), inplace=True)
rgi_df.sort_values(by='short_name', inplace=True)
rgi_df.set_index('short_name', inplace=True)
rgi_df.loc['rhone']

In [None]:
path_rgi_alps = os.path.join(cfg.dataPath,
                             'GLAMOS/topo/gridded_topo_inputs/RGI_v6_11/')
rgi_ids = os.listdir(path_rgi_alps)
year = 2023
pos_gl, rgis = [], []
for rgi_gl in tqdm(rgi_ids):
    if os.path.exists(
            os.path.join(path_rgi_alps, rgi_gl,
                         f"{rgi_gl}_grid_{year}.parquet")):
        df = pd.read_parquet(
            os.path.join(path_rgi_alps, rgi_gl,
                         f"{rgi_gl}_grid_{year}.parquet"))
    else:
        continue
    pos_gl.append((df.POINT_LAT.mean(), df.POINT_LON.mean()))
    rgis.append(rgi_gl)
df_pos_all = pd.DataFrame(pos_gl, columns=['lat', 'lon'])
df_pos_all['rgi_id'] = rgis

print('Number of glaciers in RGI region 11.6:', len(df_pos_all))

# ---- 2. Create figure and base map ----
fig = plt.figure(figsize=(18, 10))

latN, latS = 48, 44
lonW, lonE = 4, 14
projPC = ccrs.PlateCarree()
ax2 = plt.axes(projection=projPC)
ax2.set_extent([lonW, lonE, latS, latN], crs=ccrs.Geodetic())

ax2.add_feature(cfeature.COASTLINE)
ax2.add_feature(cfeature.LAKES)
ax2.add_feature(cfeature.RIVERS)
ax2.add_feature(cfeature.BORDERS, linestyle='-', linewidth=1)

g = sns.scatterplot(
    data=df_pos_all,
    x='lon',
    y='lat',
    alpha=0.6,
    transform=projPC,
    ax=ax2,
    zorder=10,
    legend=True  # custom legend added below
)

glacier_outline_rgi.plot(ax=ax2, transform=projPC, color='black')

# ---- 4. Gridlines ----
gl = ax2.gridlines(draw_labels=True,
                   linewidth=1,
                   color='gray',
                   alpha=0.5,
                   linestyle='--')
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'size': 16, 'color': 'black'}
gl.ylabel_style = {'size': 16, 'color': 'black'}
gl.top_labels = gl.right_labels = False

In [None]:
path_xr_grids = os.path.join(cfg.dataPath,
                             "GLAMOS/topo/RGI_v6_11/xr_masked_grids/")
path_xr_svf = os.path.join(cfg.dataPath,
                           "GLAMOS/topo/RGI_v6_11/svf_nc_latlon/")

rgi_gl = 'RGI60-11.01238'

ds = xr.open_zarr(path_xr_grids + rgi_gl + '.zarr')

# ds_svf = xr.open_dataset(path_xr_svf + f'{rgi_gl}_svf_latlon.nc')

fig = plt.figure(figsize=(12, 6))
ax = plt.subplot(1, 2, 1)
ds.masked_elev.plot(ax=ax)
# ax = plt.subplot(1, 2, 2)
# ds_svf.svf.plot(ax=ax)
plt.tight_layout()

## Results:

In [None]:
#path_rgi = cfg.dataPath+'GLAMOS/RGI/RGI2000-v7.0-G-11_central_europe/RGI2000-v7.0-G-11_central_europe.shp'
path_rgi = cfg.dataPath + 'GLAMOS/RGI/nsidc0770_11.rgi60.CentralEurope/11_rgi60_CentralEurope.shp'

# load RGI shapefile
gdf = gpd.read_file(path_rgi)

# check CRS
print(gdf.crs)

# reproject to a local equal-area projection (example: EPSG:3035 for Europe)
gdf_proj = gdf.to_crs(3035)
gdf_proj.set_index('RGIId', inplace=True, drop=True)
gdf_proj["area_m2"] = gdf_proj.geometry.area
gdf_proj["area_km2"] = gdf_proj["area_m2"] / 1e6

# open output file
output_df = pd.read_csv("logs/glacier_mean_MB.csv").drop(['Index'], axis=1)

output_df['area_gl'] = output_df['RGIId'].map(
    lambda x: gdf_proj.loc[x, 'area_km2'])

df = output_df.copy()

# annual change per glacier in Gt
df["annual_change_gt"] = (df["Mean_MB"] * df["area_gl"]) / 1e9

# total annual change in Gt (sum across glaciers)
annual_gt = df.groupby("Year")["annual_change_gt"].sum().reset_index(
    name="Annual_MB_Gt")

# cumulative MB in Gt
annual_gt["Cumulative_MB_Gt"] = annual_gt["Annual_MB_Gt"].cumsum()

# compute weighted mean MB per year
yearly_weighted = (output_df.groupby("Year").apply(lambda g: (g["Mean_MB"] * g[
    "area_gl"]).sum() / g["area_gl"].sum()).reset_index(name="Weighted_MB"))
yearly_weighted.head()

In [None]:
# compare to GLAMBIE
glambie_df = pd.read_csv('glambie_values.csv')
date_columns = [
    'central_europe_dates', 'central_europe_start_dates',
    'central_europe_end_dates'
]

glambie_df[date_columns] = glambie_df[date_columns].apply(
    lambda x: x.round() - 1)
glambie_df.head()

In [None]:
# --- plotting ---
fig, axs = plt.subplots(1, 2, figsize=(15, 6), sharey=True)

# --------------------
# Left: LSTM results
# --------------------
ax1 = axs[0]
years = yearly_weighted['Year']

# barplot: annual weighted MB (m w.e.)
ax1.bar(years,
        yearly_weighted['Weighted_MB'],
        color="skyblue",
        label="Area-weighted annual MB")
ax1.set_ylabel("Annual MB (m w.e.)", color="skyblue")

# lineplot: cumulative MB in Gt (secondary axis)
ax2 = ax1.twinx()
ax2.plot(annual_gt['Year'],
         annual_gt['Cumulative_MB_Gt'],
         color="red",
         marker="o",
         label="Cumulative MB")
ax2.set_ylabel("Cumulative MB (Gt)", color="red")

ax1.set_title("Central Alps annual MB (LSTM)")
ax1.legend(loc="upper left")
ax2.legend(loc="upper right")

# --------------------
# Right: GLAMBIE results
# --------------------
ax3 = axs[1]

# annual MB (bars)
ax3.bar(glambie_df['central_europe_end_dates'],
        glambie_df['central_europe_annual_change_mwe'],
        color="lightgreen",
        label="Annual MB (GLAMBIE)")
ax3.set_ylabel("Annual MB (m w.e.)", color="lightgreen")

# cumulative MB (line, secondary axis)
ax4 = ax3.twinx()
ax4.plot(glambie_df['central_europe_dates'],
         glambie_df['central_europe_cumulative_change_gt'],
         color="darkgreen",
         marker="s",
         label="Cumulative MB (GLAMBIE)")
ax4.set_ylabel("Cumulative MB (Gt)", color="darkgreen")

ax3.set_title("Central Europe MB (GLAMBIE)")
ax3.legend(loc="upper left")
ax4.legend(loc="upper right")

# --------------------
# Formatting
# --------------------
for ax in axs:
    ax.tick_params(axis="x", rotation=45)

plt.tight_layout()
plt.show()


In [None]:
# make sure both datasets use the same x-axis type
years_lstm = yearly_weighted['Year']

years_glambie = glambie_df['central_europe_end_dates']

fig, ax = plt.subplots(figsize=(12, 6))

# bar width
width = 0.4

# LSTM bars (slightly shifted left)
ax.bar(years_lstm - 0.2,
       yearly_weighted['Weighted_MB'],
       width=width,
       color="skyblue",
       label="LSTM Annual MB")

# GLAMBIE bars (slightly shifted right)
ax.bar(years_glambie + 0.2,
       glambie_df['central_europe_annual_change_mwe'],
       width=width,
       color="lightgreen",
       label="GLAMBIE Annual MB")

# formatting
ax.set_ylabel("Annual MB (m w.e.)")
ax.set_title("Annual Mass Balance: LSTM vs GLAMBIE")
ax.legend()
plt.xticks(rotation=45)
plt.tight_layout()
plt.show()