In [1]:
# test_bias_metrics.ipynb

# %% [markdown]
"""
# Testing Alignment

**Goal**: Demonstrate how to load ensemble data and reference data, perform alignment, 
          and compute standard bias metrics (ME, MAE, RMSE).

We will:
1. Load an ensemble dataset (potentially with multiple members).
2. Load a single-member reference dataset (SPARTACUS).
3. Perform optional spatial and temporal subsetting.
4. Convert to a consistent time scale (e.g., daily sums).
5. Compare:
   - A single member vs reference
   - A partial ensemble mean vs reference (first 5 members)
   - The full ensemble mean vs reference
6. Compute bias metrics using `bias_metrics.py`.
"""

# %%
import sys
import os

sys.path.append(os.path.abspath(".."))

import xarray as xr
import matplotlib.pyplot as plt

# Import custom modules
from data_loading.load_data import load_ensemble_any_latlon, load_dataset
from data_loading.subset_time import get_common_time_range, subset_time
from data_loading.subset_region import subset_dataset  # or however it's named
from utils.temporal_stats import (
    aggregate_to_daily,
)
from utils.bias_metrics import (
    mean_error,
    mean_absolute_error,
    root_mean_squared_error,
    compute_all_bias_metrics
)

# %%
# 1. LOAD DATA
# Adjust file patterns as needed.
ensemble_pattern = os.path.join("..", "data", "total_precipitation_2017010*.nc")
ref_path = os.path.join("..", "data", "SPARTACUS2-DAILY_RR_2017.nc")

# Load ensemble with 2D or 1D lat/lon handling
ds_ensemble = load_ensemble_any_latlon(ensemble_pattern)

# Load single-member reference
ds_ref = xr.open_dataset(ref_path)  # or load_dataset(ref_path)

print("Ensemble dataset:\n", ds_ensemble)
print("Reference dataset:\n", ds_ref)



Ensemble dataset:
 <xarray.Dataset> Size: 2GB
Dimensions:        (member: 11, time: 48, lat: 492, lon: 594)
Coordinates:
  * member         (member) object 88B '00' '01' '02' '03' ... '08' '09' '10'
    lon            (lat, lon) float32 1MB 5.498 5.526 5.554 ... 22.05 22.07 22.1
  * time           (time) datetime64[ns] 384B 2017-01-01 ... 2017-01-02T23:00:00
Dimensions without coordinates: lat
Data variables:
    latitude       (time, lat, lon, member) float32 617MB 42.98 42.98 ... 51.82
    precipitation  (time, lat, lon, member) float64 1GB 0.0 0.0 0.0 ... 0.0 0.0
Attributes:
    history:  Fri Jan 31 09:51:14 2025: ncrename -v lat,latitude -v lon,longi...
    NCO:      netCDF Operators version 5.1.9 (Homepage = http://nco.sf.net, C...
Reference dataset:
<xarray.Dataset> Size: 563MB
Dimensions:                  (time: 365, y: 329, x: 584)
Coordinates:
    lambert_conformal_conic  float64 8B ...
    lat                      (y, x) float32 769kB ...
    lon                      (y, x) f

In [2]:
from data_loading.subset_region import unify_lat_lon_names

ds_ensemble = unify_lat_lon_names(ds_ensemble, lat_name="lat", lon_name="lon")

ds_ref = unify_lat_lon_names(ds_ref, lat_name="lat", lon_name="lon")


print("Ensemble dataset:\n", ds_ensemble)
print("Reference dataset:\n", ds_ref)


Ensemble dataset:
 <xarray.Dataset> Size: 2GB
Dimensions:        (member: 11, time: 48, lat: 492, lon: 594)
Coordinates:
  * member         (member) object 88B '00' '01' '02' '03' ... '08' '09' '10'
    lon            (lat, lon) float32 1MB 5.498 5.526 5.554 ... 22.05 22.07 22.1
  * time           (time) datetime64[ns] 384B 2017-01-01 ... 2017-01-02T23:00:00
Dimensions without coordinates: lat
Data variables:
    lat            (time, lat, lon, member) float32 617MB 42.98 42.98 ... 51.82
    precipitation  (time, lat, lon, member) float64 1GB 0.0 0.0 0.0 ... 0.0 0.0
Attributes:
    history:  Fri Jan 31 09:51:14 2025: ncrename -v lat,latitude -v lon,longi...
    NCO:      netCDF Operators version 5.1.9 (Homepage = http://nco.sf.net, C...
Reference dataset:
 <xarray.Dataset> Size: 563MB
Dimensions:                  (time: 365, y: 329, x: 584)
Coordinates:
    lambert_conformal_conic  float64 8B ...
    lat                      (y, x) float32 769kB ...
    lon                      (y, x) 

In [3]:
from data_loading.subset_region import get_reference_extent, subset_dataset

# 2D lat/lon in SPARTACUS => lat_var="lat", lon_var="lon"
ref_bounds = get_reference_extent(ds_ref, lat_var="lat", lon_var="lon")

subset_dataset(ds_ensemble, lat_var="lat", lon_var="lon", bounds=ref_bounds)
subset_dataset(ds_ref,      lat_var="lat", lon_var="lon", bounds=ref_bounds)
