pySTEPS · ladc · Aug 18, 2021 · Aug 11, 2021 · Aug 12, 2021 · Aug 13, 2021
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/psf/black
-    rev: 21.6b0
+    rev: 21.7b0
     hooks:
     - id: black
       language_version: python3
diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py
@@ -0,0 +1,188 @@
+"""
+pysteps.blending.clim
+=====================
+
+Module with methods to read, write and compute past and climatological model weights.
+
+.. autosummary::
+    :toctree: ../generated/
+
+    get_default_weights    
+    save_weights
+    calc_clim_weights
+"""
+
+import numpy as np
+from os.path import exists, join
+import pickle
+from pysteps import rcparams
+
+
+def get_default_weights(n_cascade_levels=8, n_models=1):
+    """
+    Get the default weights as given in BPS2004.
+    Take subset of n_cascade_levels or add entries with small values (1e-4) if
+    n_cascade_levels differs from 8.
+
+    Parameters
+    ----------
+    n_cascade_levels: int, optional
+      Number of cascade levels. Defaults to 8.
+    n_models: int, optional
+      Number of NWP models. Defaults to 1.
+
+    Returns
+    -------
+    default_weights: array-like
+      Array of shape [model, scale_level] containing the climatological weights.
+
+    """
+
+    default_weights = np.array(
+        [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040]
+    )
+    n_weights = default_weights.shape[0]
+    if n_cascade_levels < n_weights:
+        default_weights = default_weights[0:n_cascade_levels]
+    elif n_cascade_levels > n_weights:
+        default_weights = np.append(
+            default_weights, np.repeat(1e-4, n_cascade_levels - n_weights)
+        )
+    return np.resize(default_weights, (n_models, n_cascade_levels))
+
+
+def save_weights(
+    current_weights,
+    validtime,
+    outdir_path=rcparams.outputs["path_workdir"],
+    window_length=30,
+):
+    """
+    Add the current NWP weights to update today's daily average weight. If the
+    day is over, update the list of daily average weights covering a rolling
+    window.
+
+    Parameters
+    ----------
+    current_weights: array-like
+      Array of shape [model, scale_level, ...]
+      containing the current weights of the different NWP models per cascade
+      level.
+    validtime: datetime
+      Datetime object containing the date and time at for which the current
+      weights are valid.
+    outdir_path: string, optional
+      Path to folder where the historical weights are stored. Defaults to
+      path_workdir from rcparams.
+    window_length: int, optional
+      Length of window (in days) of daily weights that should be retained.
+      Defaults to 30.
+
+    Returns
+    -------
+    None
+
+    """
+
+    n_cascade_levels = current_weights.shape[1]
+
+    # Load weights_today, a dictionary containing {mean_weights, n, last_validtime}
+    weights_today_file = join(outdir_path, "NWP_weights_today.pkl")
+    if exists(weights_today_file):
+        with open(weights_today_file, "rb") as f:
+            weights_today = pickle.load(f)
+    else:
+        weights_today = {
+            "mean_weights": np.copy(current_weights),
+            "n": 0,
+            "last_validtime": validtime,
+        }
+
+    # Load the past weights which is an array with dimensions day x model x scale_level
+    past_weights_file = join(outdir_path, "NWP_weights_window.npy")
+    past_weights = None
+    if exists(past_weights_file):
+        past_weights = np.load(past_weights_file)
+    # First check if we have started a new day wrt the last written weights, in which
+    # case we should update the daily weights file and reset daily statistics.
+    if weights_today["last_validtime"].date() < validtime.date():
+        # Append weights to the list of the past X daily averages.
+        if past_weights is not None:
+            past_weights = np.append(
+                past_weights, [weights_today["mean_weights"]], axis=0
+            )
+        else:
+            past_weights = np.array([weights_today["mean_weights"]])
+        print(past_weights.shape)
+        # Remove oldest if the number of entries exceeds the window length.
+        if past_weights.shape[0] > window_length:
+            past_weights = np.delete(past_weights, 0, axis=0)
+        # FIXME also write out last_validtime.date() in this file?
+        # In that case it will need pickling or netcdf.
+        # Write out the past weights within the rolling window.
+        np.save(past_weights_file, past_weights)
+        # Reset statistics for today.
+        weights_today["n"] = 0
+        weights_today["mean_weights"] = 0
+
+    # Reset today's weights if needed and/or compute online average from the
+    # current weights using numerically stable algorithm
+    weights_today["n"] += 1
+    weights_today["mean_weights"] += (
+        current_weights - weights_today["mean_weights"]
+    ) / weights_today["n"]
+    weights_today["last_validtime"] = validtime
+    with open(weights_today_file, "wb") as f:
+        pickle.dump(weights_today, f)
+
+    return None
+
+
+def calc_clim_weights(
+    n_cascade_levels=8,
+    outdir_path=rcparams.outputs["path_workdir"],
+    n_models=1,
+    window_length=30,
+):
+    """
+    Return the climatological weights based on the daily average weights in the
+    rolling window. This is done using a geometric mean.
+
+    Parameters
+    ----------
+    n_cascade_levels: int, optional
+      Number of cascade levels.
+    outdir_path: string, optional
+      Path to folder where the historical weights are stored. Defaults to
+      path_workdir from rcparams.
+    n_models: int, optional
+      Number of NWP models. Defaults to 1.
+    window_length: int, optional
+      Length of window (in days) over which to compute the climatological
+      weights. Defaults to 30.
+
+    Returns
+    -------
+    climatological_mean_weights: array-like
+      Array of shape [model, scale_level, ...] containing the climatological
+      (geometric) mean weights.
+
+    """
+    past_weights_file = join(outdir_path, "NWP_weights_window.npy")
+    # past_weights has dimensions date x model x scale_level  x ....
+    if exists(past_weights_file):
+        past_weights = np.load(past_weights_file)
+    else:
+        past_weights = None
+    # check if there's enough data to compute the climatological skill
+    if not past_weights or past_weights.shape[0] < window_length:
+        return get_default_weights(n_cascade_levels, n_models)
+    # reduce window if necessary
+    else:
+        past_weights = past_weights[-window_length:]
+
+    # Calculate climatological weights from the past_weights using the
+    # geometric mean.
+    geomean_weights = np.exp(np.log(past_weights).mean(axis=0))
+
+    return geomean_weights
diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py
@@ -65,7 +65,7 @@ def spatial_correlation(obs, mod):
     return rho
 
 
-def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()):
+def lt_dependent_cor_nwp(lt, correlations, skill_kwargs=dict()):
     """Determine the correlation of a model field for lead time lt and
     cascade k, by assuming that the correlation determined at t=0 regresses
     towards the climatological values.
@@ -79,10 +79,9 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()):
         Array of shape [n_cascade_levels] containing per cascade_level the
         correlation between the normalized cascade of the observed (radar)
         rainfall field and the normalized cascade of the model field.
-    outdir_path: str
-        Path to folder where the historical weights are stored.
     skill_kwargs : dict, optional
-        Dictionary containing e.g. the nmodels and window_length parameters.
+        Dictionary containing e.g. the outdir_path, nmodels and window_length
+        parameters.
 
     Returns
     -------
@@ -99,9 +98,7 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()):
     """
     # Obtain the climatological values towards which the correlations will
     # regress
-    clim_cor_values, regr_pars = clim_regr_values(
-        len(correlations), outdir_path, skill_kwargs=None
-    )
+    clim_cor_values, regr_pars = clim_regr_values(n_cascade_levels=len(correlations))
     # Determine the speed of the regression (eq. 24 in BPS2004)
     qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :]))
     # Determine the correlation for lead time lt
@@ -158,7 +155,8 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non
     return rho, rho_prev
 
 
-def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()):
+# TODO: Make sure the method can also handle multiple model inputs..
+def clim_regr_values(n_cascade_levels, skill_kwargs=dict()):
     """Obtains the climatological correlation values and regression parameters
     from a file called NWP_weights_window.bin in the outdir_path. If this file
     is not present yet, the values from :cite:`BPS2004` are used.
@@ -168,10 +166,9 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()):
     ----------
     n_cascade_levels : int
         The number of cascade levels to use.
-    outdir_path: str
-        Path to folder where the historical weights are stored.
     skill_kwargs : dict, optional
-        Dictionary containing e.g. the nmodels and window_length parameters.
+        Dictionary containing e.g. the outdir_path, nmodels and window_length
+        parameters.
 
     Returns
     -------
@@ -201,16 +198,18 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()):
     # First, obtain climatological skill values
     try:
         clim_cor_values = clim.calc_clim_weights(
-            outdir_path, n_cascade_levels, **skill_kwargs
+            n_cascade_levels=n_cascade_levels, **skill_kwargs
         )
     except FileNotFoundError:
         # The climatological skill values file does not exist yet, so we'll
         # use the default values from BPS2004.
         clim_cor_values = np.array(
-            [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040]
+            [[0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040]]
         )
 
-    # Check if clim_cor_values has only one dimension, otherwise it has
+    clim_cor_values = clim_cor_values[0]
+
+    # Check if clim_cor_values has only one model, otherwise it has
     # returned the skill values for multiple models
     if clim_cor_values.ndim != 1:
         raise IndexError(
@@ -224,16 +223,28 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()):
         # Get the number of cascade levels that is missing
         n_extra_lev = n_cascade_levels - clim_cor_values.shape[0]
         # Append the array with correlation values of 10e-4
-        clim_cor_values = np.append(clim_cor_values, np.repeat(10e-4, n_extra_lev))
+        clim_cor_values = np.append(clim_cor_values, np.repeat(1e-4, n_extra_lev))
 
     # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded,
     # change to own values when present.
-    # TODO: Regression pars for different cascade levels
     regr_pars = np.array(
         [
             [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0],
             [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4],
         ]
     )
 
+    # Check whether the number of cascade_levels is correct
+    if regr_pars.shape[1] > n_cascade_levels:
+        regr_pars = regr_pars[:, 0:n_cascade_levels]
+    elif regr_pars.shape[1] < n_cascade_levels:
+        # Get the number of cascade levels that is missing
+        n_extra_lev = n_cascade_levels - regr_pars.shape[1]
+        # Append the array with correlation values of 10e-4
+        regr_pars = np.append(
+            regr_pars,
+            [np.repeat(10.0, n_extra_lev), np.repeat(10e4, n_extra_lev)],
+            axis=1,
+        )
+
     return clim_cor_values, regr_pars
diff --git a/pysteps/pystepsrc b/pysteps/pystepsrc
@@ -4,7 +4,8 @@
     "silent_import": false,
     "outputs": {
         // path_outputs : path where to save results (figures, forecasts, etc)
-        "path_outputs": "./"
+        "path_outputs": "./",
+        "path_workdir": "./tmp/"
     },
     "plot": {
         // "motion_plot" : "streamplot" or "quiver"