Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/python-weather-diagnostics-toolkit-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ jobs:
- name: Run tests
run: python -m pytest

- name: Run lint
run: python -m ruff check .

- name: Compile modules and scripts
run: python -m compileall src scripts

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,9 @@ area_mean = sum(field * weight) / sum(valid_weight)

This avoids treating all latitude rows as equal-area rows. The implementation
supports arrays with time or pressure dimensions before the final
latitude/longitude dimensions.
latitude/longitude dimensions. If a reduction window contains no finite values,
the result is `NaN`; this keeps missing-data coverage explicit instead of
converting an empty region into a numerical zero.

## Time-Ordered Baseline Model

Expand Down Expand Up @@ -203,15 +205,18 @@ fixed seed. It then computes:

```text
mean = ensemble mean
spread = ensemble standard deviation
spread = ensemble population standard deviation
p10 = 10th percentile
p90 = 90th percentile
warm_probability = fraction(member >= 0.5)
cold_probability = fraction(member <= -0.5)
```

These calculations demonstrate ensemble-summary mechanics without embedding
real forecast products.
The summary requires finite member values. The spread uses the population
standard deviation (`ddof=0`), so a one-member reviewer fixture has spread `0`
instead of an undefined sample standard deviation. These calculations
demonstrate ensemble-summary mechanics without embedding real forecast
products.

Example synthetic summary rows:

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,6 +142,11 @@ members:
- quantiles show an uncertainty envelope
- threshold probabilities translate members into categorical risk-like summaries

The public implementation treats missing or infinite ensemble values as input
quality problems. Synthetic fixtures are deliberately complete, which makes
review outputs deterministic and keeps missing-data policy separate from
forecast interpretation.

Interpretation pattern:

```text
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,18 @@ def relative_vorticity(u_wind, v_wind, latitude, longitude) -> np.ndarray:
return dv_dx - du_dy


def _matching_field(name: str, field, expected_shape: tuple[int, int]) -> np.ndarray:
values = np.asarray(field, dtype=float)
if values.shape != expected_shape:
raise ValueError(f"{name} shape must match scalar field shape: {values.shape} != {expected_shape}")
return values


def horizontal_advection(scalar, u_wind, v_wind, latitude, longitude) -> np.ndarray:
"""Compute horizontal advection, -(u dS/dx + v dS/dy)."""

scalar_values = np.asarray(scalar, dtype=float)
dscalar_dy, dscalar_dx = gradient_on_latlon(scalar, latitude, longitude)
return -(
np.asarray(u_wind, dtype=float) * dscalar_dx
+ np.asarray(v_wind, dtype=float) * dscalar_dy
)
u_values = _matching_field("u_wind", u_wind, scalar_values.shape)
v_values = _matching_field("v_wind", v_wind, scalar_values.shape)
return -(u_values * dscalar_dx + v_values * dscalar_dy)
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,18 @@ def make_synthetic_nino_ensemble(
def ensemble_summary(df: pd.DataFrame) -> pd.DataFrame:
"""Summarize an ensemble by lead month."""

if df.empty or df.shape[1] == 0:
raise ValueError("ensemble summary requires at least one member and one lead")

values = df.astype(float)
if not np.isfinite(values.to_numpy()).all():
raise ValueError("ensemble summary requires finite member values")

summary = pd.DataFrame(index=df.index)
summary["mean"] = df.mean(axis=1)
summary["spread"] = df.std(axis=1)
summary["p10"] = df.quantile(0.10, axis=1)
summary["p90"] = df.quantile(0.90, axis=1)
summary["warm_probability"] = (df >= 0.5).mean(axis=1)
summary["cold_probability"] = (df <= -0.5).mean(axis=1)
summary["mean"] = values.mean(axis=1)
summary["spread"] = values.std(axis=1, ddof=0)
summary["p10"] = values.quantile(0.10, axis=1)
summary["p90"] = values.quantile(0.90, axis=1)
summary["warm_probability"] = (values >= 0.5).mean(axis=1)
summary["cold_probability"] = (values <= -0.5).mean(axis=1)
return summary
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,18 @@ def area_mean(field, latitude) -> np.ndarray:
"""Area-weight a field over its final two latitude/longitude dimensions."""

values = np.asarray(field, dtype=float)
if values.ndim < 2:
raise ValueError("field must have at least latitude and longitude dimensions")
weights = latitude_weights(latitude)
if values.shape[-2] != weights.size:
raise ValueError("latitude length must match the second-to-last field dimension")
reshape = (1,) * (values.ndim - 2) + (weights.size, 1)
weights_2d = weights.reshape(reshape)
numerator = np.nansum(values * weights_2d, axis=(-2, -1))
denominator = np.nansum(np.isfinite(values) * weights_2d, axis=(-2, -1))
return numerator / denominator
with np.errstate(divide="ignore", invalid="ignore"):
mean = numerator / denominator
return np.where(denominator > 0.0, mean, np.nan)


def build_forecast_frame(
Expand Down Expand Up @@ -66,8 +70,12 @@ def ridge_regression_fit_predict(
raise ValueError("features must be a two-dimensional array")
if y.ndim != 1 or y.size != x.shape[0]:
raise ValueError("target must be one-dimensional and aligned with features")
if not np.isfinite(x).all() or not np.isfinite(y).all():
raise ValueError("features and target must contain only finite values")
if not 0.0 < train_fraction < 1.0:
raise ValueError("train_fraction must be between 0 and 1")
if alpha < 0.0:
raise ValueError("alpha must be non-negative")

split = int(x.shape[0] * train_fraction)
if split <= 0 or split >= x.shape[0]:
Expand All @@ -78,7 +86,7 @@ def ridge_regression_fit_predict(

mean = x_train.mean(axis=0)
scale = x_train.std(axis=0)
scale = np.where(scale == 0.0, 1.0, scale)
scale = np.where(np.isclose(scale, 0.0), 1.0, scale)
x_train_scaled = (x_train - mean) / scale
x_test_scaled = (x_test - mean) / scale

Expand Down Expand Up @@ -106,6 +114,10 @@ def regression_metrics(y_true, y_pred) -> dict[str, float]:
pred = np.asarray(y_pred, dtype=float)
if true.shape != pred.shape:
raise ValueError("y_true and y_pred must have matching shapes")
if true.size == 0:
raise ValueError("metrics require at least one sample")
if not np.isfinite(true).all() or not np.isfinite(pred).all():
raise ValueError("metrics require finite values")

err = pred - true
if true.size < 2 or np.allclose(true.std(), 0.0) or np.allclose(pred.std(), 0.0):
Expand Down
12 changes: 12 additions & 0 deletions projects/python-weather-diagnostics-toolkit/tests/test_dynamics.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

from python_weather_diagnostics_toolkit.dynamics import (
gradient_on_latlon,
Expand Down Expand Up @@ -40,3 +41,14 @@ def test_zonal_gradient_masks_exact_pole_rows_without_infinity():
assert np.isnan(d_dx[0]).all()
assert np.isnan(d_dx[-1]).all()
assert np.isfinite(d_dx[1:-1]).all()


def test_horizontal_advection_rejects_mismatched_wind_shape():
lat = np.linspace(30.0, 35.0, 5)
lon = np.linspace(100.0, 105.0, 6)
scalar = np.ones((lat.size, lon.size))
u = np.ones((lat.size, lon.size - 1))
v = np.ones_like(scalar)

with pytest.raises(ValueError, match="u_wind shape"):
horizontal_advection(scalar, u, v, lat, lon)
21 changes: 21 additions & 0 deletions projects/python-weather-diagnostics-toolkit/tests/test_ensemble.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
import numpy as np
import pandas as pd
import pytest

from python_weather_diagnostics_toolkit.ensemble import ensemble_summary


def test_single_member_ensemble_has_zero_spread():
df = pd.DataFrame({"member_01": [0.2, 0.8]}, index=[1, 2])

summary = ensemble_summary(df)

np.testing.assert_allclose(summary["spread"].to_numpy(), np.array([0.0, 0.0]))
np.testing.assert_allclose(summary["warm_probability"].to_numpy(), np.array([0.0, 1.0]))


def test_ensemble_summary_rejects_non_finite_values():
df = pd.DataFrame({"member_01": [0.2, np.nan]}, index=[1, 2])

with pytest.raises(ValueError, match="finite"):
ensemble_summary(df)
24 changes: 24 additions & 0 deletions projects/python-weather-diagnostics-toolkit/tests/test_features.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

from python_weather_diagnostics_toolkit.features import (
area_mean,
Expand All @@ -16,6 +17,15 @@ def test_area_mean_preserves_constant_field():
np.testing.assert_allclose(mean, np.array([7.0, 7.0]))


def test_area_mean_returns_nan_for_all_missing_region():
lat = np.array([30.0, 35.0, 40.0])
field = np.full((lat.size, 4), np.nan)

mean = area_mean(field, lat)

assert np.isnan(mean)


def test_ridge_baseline_returns_predictions_and_metrics():
x = np.column_stack([np.arange(20.0), np.arange(20.0) * 0.5])
y = 2.0 + x[:, 0] * 0.3 - x[:, 1] * 0.1
Expand All @@ -25,3 +35,17 @@ def test_ridge_baseline_returns_predictions_and_metrics():

assert result["y_pred"].shape == result["y_test"].shape
assert metrics["rmse"] < 0.05


def test_ridge_baseline_rejects_non_finite_training_values():
x = np.column_stack([np.arange(20.0), np.arange(20.0) * 0.5])
y = np.arange(20.0)
x[3, 0] = np.inf

with pytest.raises(ValueError, match="finite"):
ridge_regression_fit_predict(x, y)


def test_regression_metrics_reject_empty_inputs():
with pytest.raises(ValueError, match="at least one sample"):
regression_metrics(np.array([]), np.array([]))