From 7b5be66d41dc42f3a373f2629c0012709afc84bf Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Wed, 11 Aug 2021 20:45:10 +0200 Subject: [PATCH 01/11] First attempt at functions to obtain climatological NWP weight/skill. Functions: save_weights: handles IO and daily weights computation calc_clim_weights: computes the climatological weights --- pysteps/blending/clim.py | 108 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 108 insertions(+) create mode 100644 pysteps/blending/clim.py diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py new file mode 100644 index 000000000..2efc8783b --- /dev/null +++ b/pysteps/blending/clim.py @@ -0,0 +1,108 @@ +""" +pysteps.blending.clim +===================== + +Module with methods to read, write and compute past and climatological model skill. + +.. autosummary:: + :toctree: ../generated/ + + save_weights + calc_clim_weights +""" + +import numpy as np + + +def save_weights(current_weights, validtime, model_names, outdir_path, window_length): + """ + Add the current NWP weights/skills to update today's daily average weight. If + the day is over, update the list of daily average weights covering a + rolling window. + + Parameters + ---------- + current_weights: array-like + Array of shape [model, scale_level, ...] + containing the current weights of the different NWP models per cascade level. + model_names : list of strings + List containing unique identifiers for each NWP model. + outdir_path: string + Path to folder where the historical weights are stored. + window_length: int + Length of window over which to compute the climatological weights (in days). + + Returns + ------- + None + + """ + + # TODO everywhere: error catching + + # Load weights_today, a dictionary containing {mean_weights, n, last_validtime} + weights_today = np.load(outdir_path + "NWP_weights_today.bin") + + # Load the past weights which is an array with dimensions day x model x scale_level + past_weights = np.load(outdir_path + "NWP_weights_window.bin") + + # First check if we have started a new day wrt the last written weights, in which + # case we should update the daily weights file and reset daily statistics. + if weights_today.last_validtime.date() < validtime.date(): + # Append weights to the list of the past X daily averages. + past_weights = np.append(past_weights, [weights_today.mean_weights], axis=0) + # Remove oldest if the number of entries exceeds the window length. + if past_weights.shape[0] > window_length: + past_weights = np.delete(past_weights, 0, axis=0) + # TODO also write out last_validtime.date() in this file? + # In that case it will need pickling or netcdf. + # Write out the past weights within the rolling window. + np.save(outdir_path + "NWP_weights_window.bin", past_weights) + # Reset statistics for today. + weights_today.n = 0 + weights_today.mean_weights = 0 + + # Reset today's weights if needed and/or compute online average from the + # current weights using numerically stable algorithm + weights_today.n += 1 + weights_today.mean_weights += ( + current_weights - weights_today.mean_weights + ) / weights_today.n + weights_today.last_validtime = validtime + + np.save(outdir_path + "NWP_weights_today.bin", weights_today, allow_pickle=True) + + return None + + +def calc_clim_weights(model_names, outdir_path): + """ + Return the climatological skill based on the weights in the rolling + window. This is done using a geometric mean. + + Parameters + ---------- + weights: array-like + Array of shape [model, scale_level, ...] + containing the current weights of the different NWP models per cascade level. + model_names : list of strings + List containing unique identifiers for each NWP model. + outdir_path: string + Path to folder where the historical weights are stored. + window_length: int + Length of window over which to compute the climatological weights (in days). + + Returns + ------- + climatological_mean_weights: array-like + Array containing the climatological weights. + + """ + + past_weights = np.load(outdir_path + "NWP_weights_window.bin") + + # Calculate climatological weights from the past_weights using the + # geometric mean. + geomean_weights = np.exp(np.log(past_weights).mean(axis=0)) + + return geomean_weights From cac31bd4230823256a708ae30490a7cd3eab6b70 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Thu, 12 Aug 2021 16:16:20 +0200 Subject: [PATCH 02/11] Fixes in pysteps/blending/clim.py: Check file existence Fix comments --- pysteps/blending/clim.py | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index 2efc8783b..288ff6b5f 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -2,7 +2,7 @@ pysteps.blending.clim ===================== -Module with methods to read, write and compute past and climatological model skill. +Module with methods to read, write and compute past and climatological model weights. .. autosummary:: :toctree: ../generated/ @@ -12,13 +12,14 @@ """ import numpy as np +from os.path import exists def save_weights(current_weights, validtime, model_names, outdir_path, window_length): """ - Add the current NWP weights/skills to update today's daily average weight. If - the day is over, update the list of daily average weights covering a - rolling window. + Add the current NWP weights to update today's daily average weight. If the + day is over, update the list of daily average weights covering a rolling + window. Parameters ---------- @@ -30,7 +31,7 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le outdir_path: string Path to folder where the historical weights are stored. window_length: int - Length of window over which to compute the climatological weights (in days). + Length of window (in days) over which to compute the climatological weights. Returns ------- @@ -38,10 +39,19 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le """ - # TODO everywhere: error catching + n_cascade_levels = current_weights.shape[1] # Load weights_today, a dictionary containing {mean_weights, n, last_validtime} - weights_today = np.load(outdir_path + "NWP_weights_today.bin") + weights_today_file = outdir_path + "NWP_weights_today.bin" + weights_today = ( + exists(weights_today_file) + and load(weights_today_file) + or { + "mean_weights": np.copy(current_weights), + "n": 0, + "last_validtime": validtime, + } + ) # Load the past weights which is an array with dimensions day x model x scale_level past_weights = np.load(outdir_path + "NWP_weights_window.bin") @@ -54,7 +64,7 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le # Remove oldest if the number of entries exceeds the window length. if past_weights.shape[0] > window_length: past_weights = np.delete(past_weights, 0, axis=0) - # TODO also write out last_validtime.date() in this file? + # FIXME also write out last_validtime.date() in this file? # In that case it will need pickling or netcdf. # Write out the past weights within the rolling window. np.save(outdir_path + "NWP_weights_window.bin", past_weights) @@ -77,8 +87,8 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le def calc_clim_weights(model_names, outdir_path): """ - Return the climatological skill based on the weights in the rolling - window. This is done using a geometric mean. + Return the climatological weights based on the daily average weights in the + rolling window. This is done using a geometric mean. Parameters ---------- From 09ec31b7a5b6f03da2be3f20e440d102cfe24dba Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Fri, 13 Aug 2021 10:12:47 +0200 Subject: [PATCH 03/11] Changes to clim.py: Add function to get default weights Fix arguments of functions Optional window length in clim computation --- pysteps/blending/clim.py | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index 288ff6b5f..079de12b0 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -15,7 +15,17 @@ from os.path import exists -def save_weights(current_weights, validtime, model_names, outdir_path, window_length): +def get_default_weights(n_models, n_cascade_levels): + """ + BPS2004 + """ + default_weights = np.array( + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] + ) + return default_weights + + +def save_weights(current_weights, validtime, outdir_path, window_length=30): """ Add the current NWP weights to update today's daily average weight. If the day is over, update the list of daily average weights covering a rolling @@ -26,11 +36,9 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le current_weights: array-like Array of shape [model, scale_level, ...] containing the current weights of the different NWP models per cascade level. - model_names : list of strings - List containing unique identifiers for each NWP model. outdir_path: string Path to folder where the historical weights are stored. - window_length: int + window_length: int, optional Length of window (in days) over which to compute the climatological weights. Returns @@ -85,31 +93,37 @@ def save_weights(current_weights, validtime, model_names, outdir_path, window_le return None -def calc_clim_weights(model_names, outdir_path): +def calc_clim_weights(outdir_path, n_cascade_levels, nmodels=1, window_length=30): """ Return the climatological weights based on the daily average weights in the rolling window. This is done using a geometric mean. Parameters ---------- - weights: array-like - Array of shape [model, scale_level, ...] - containing the current weights of the different NWP models per cascade level. - model_names : list of strings - List containing unique identifiers for each NWP model. outdir_path: string Path to folder where the historical weights are stored. - window_length: int - Length of window over which to compute the climatological weights (in days). + n_cascade_levels: int + Number of cascade levels. + nmodels: int, optional + Number of NWP models + window_length: int, optional + Length of window (in days) over which to compute the climatological weights. Returns ------- climatological_mean_weights: array-like - Array containing the climatological weights. + Array of shape [model, scale_level, ...] containing the climatological weights. """ + # past_weights has dimensions date x model x scale_level x .... past_weights = np.load(outdir_path + "NWP_weights_window.bin") + # check if there's enough data to compute the climatological skill + if past_weights.shape[0] < window_length: + return get_default_weights(nmodels, n_cascade_levels) + # reduce window if necessary + else: + past_weights = past_weights[-window_length:] # Calculate climatological weights from the past_weights using the # geometric mean. From 467294f56cbb20f73be00af9a40ccc8b30883c51 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Fri, 13 Aug 2021 10:32:46 +0200 Subject: [PATCH 04/11] Fix dimensions of default_weights in clim.py. --- pysteps/blending/clim.py | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index 079de12b0..fed02b2fc 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -15,14 +15,35 @@ from os.path import exists -def get_default_weights(n_models, n_cascade_levels): +def get_default_weights(n_cascade_levels, n_models=1): """ - BPS2004 + Get the default weights as given in BPS2004. + + Parameters + ---------- + n_cascade_levels: int + Number of cascade levels. + nmodels: int, optional + Number of NWP models + + Returns + ------- + default_weights: array-like + Array of shape [model, scale_level] containing the climatological weights. + """ + default_weights = np.array( [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] ) - return default_weights + n_weights = default_weights.shape[0] + if n_cascade_levels < n_weights: + default_weights = default_weights[0:n_cascade_levels] + elif n_cascade_levels > n_weights: + default_weights = np.append( + default_weights, np.repeat(1e-4, n_cascade_levels - n_weights) + ) + return np.resize(default_weights, (n_models, n_cascade_levels)) def save_weights(current_weights, validtime, outdir_path, window_length=30): From 2891fe4c265c8adf0bb04b7a2dc21c830de8901d Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 16 Aug 2021 13:00:02 +0200 Subject: [PATCH 05/11] Commit config? --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c80571951..f35ad636b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 21.6b0 + rev: 21.7b0 hooks: - id: black language_version: python3 \ No newline at end of file From 5e31e437917ddb6ff32be32c43bccd05f37e37fc Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Mon, 16 Aug 2021 13:00:49 +0200 Subject: [PATCH 06/11] Fixes in blending/clim.py. Fix path handling and errors in dictionary; get sane default values at the start. --- pysteps/blending/clim.py | 44 +++++---- pysteps/tests/test_blending_clim.py | 141 ++++++++++++++++++++++++++++ 2 files changed, 167 insertions(+), 18 deletions(-) create mode 100644 pysteps/tests/test_blending_clim.py diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index fed02b2fc..6a9422f57 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -12,12 +12,14 @@ """ import numpy as np -from os.path import exists +from os.path import exists, join def get_default_weights(n_cascade_levels, n_models=1): """ Get the default weights as given in BPS2004. + Take subset of n_cascade_levels or add entries with small values (1e-4) if + n_cascade_levels differs from 8. Parameters ---------- @@ -71,10 +73,10 @@ def save_weights(current_weights, validtime, outdir_path, window_length=30): n_cascade_levels = current_weights.shape[1] # Load weights_today, a dictionary containing {mean_weights, n, last_validtime} - weights_today_file = outdir_path + "NWP_weights_today.bin" + weights_today_file = join(outdir_path, "NWP_weights_today.npy") weights_today = ( exists(weights_today_file) - and load(weights_today_file) + and np.load(weights_today_file) or { "mean_weights": np.copy(current_weights), "n": 0, @@ -83,33 +85,39 @@ def save_weights(current_weights, validtime, outdir_path, window_length=30): ) # Load the past weights which is an array with dimensions day x model x scale_level - past_weights = np.load(outdir_path + "NWP_weights_window.bin") + past_weights_file = join(outdir_path, "NWP_weights_window.npy") + past_weights = exists(past_weights_file) and np.load(past_weights_file) or None # First check if we have started a new day wrt the last written weights, in which # case we should update the daily weights file and reset daily statistics. - if weights_today.last_validtime.date() < validtime.date(): + if weights_today["last_validtime"].date() < validtime.date(): # Append weights to the list of the past X daily averages. - past_weights = np.append(past_weights, [weights_today.mean_weights], axis=0) + past_weights = ( + past_weights + and np.append(past_weights, [weights_today["mean_weights"]], axis=0) + or np.array([weights_today["mean_weights"]]) + ) + print(past_weights.shape) # Remove oldest if the number of entries exceeds the window length. if past_weights.shape[0] > window_length: past_weights = np.delete(past_weights, 0, axis=0) # FIXME also write out last_validtime.date() in this file? # In that case it will need pickling or netcdf. # Write out the past weights within the rolling window. - np.save(outdir_path + "NWP_weights_window.bin", past_weights) + np.save(past_weights_file, past_weights) # Reset statistics for today. - weights_today.n = 0 - weights_today.mean_weights = 0 + weights_today["n"] = 0 + weights_today["mean_weights"] = 0 # Reset today's weights if needed and/or compute online average from the # current weights using numerically stable algorithm - weights_today.n += 1 - weights_today.mean_weights += ( - current_weights - weights_today.mean_weights - ) / weights_today.n - weights_today.last_validtime = validtime + weights_today["n"] += 1 + weights_today["mean_weights"] += ( + current_weights - weights_today["mean_weights"] + ) / weights_today["n"] + weights_today["last_validtime"] = validtime - np.save(outdir_path + "NWP_weights_today.bin", weights_today, allow_pickle=True) + np.save(weights_today_file, weights_today, allow_pickle=True) return None @@ -136,11 +144,11 @@ def calc_clim_weights(outdir_path, n_cascade_levels, nmodels=1, window_length=30 Array of shape [model, scale_level, ...] containing the climatological weights. """ - + past_weights_file = join(outdir_path, "NWP_weights_window.npy") # past_weights has dimensions date x model x scale_level x .... - past_weights = np.load(outdir_path + "NWP_weights_window.bin") + past_weights = exists(past_weights_file) and np.load(past_weights_file) or None # check if there's enough data to compute the climatological skill - if past_weights.shape[0] < window_length: + if not past_weights or past_weights.shape[0] < window_length: return get_default_weights(nmodels, n_cascade_levels) # reduce window if necessary else: diff --git a/pysteps/tests/test_blending_clim.py b/pysteps/tests/test_blending_clim.py new file mode 100644 index 000000000..7f2087de6 --- /dev/null +++ b/pysteps/tests/test_blending_clim.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pytest + +from pysteps.blending.clim import save_weights, calc_clim_weights +import random +from datetime import datetime, timedelta +from os.path import join, exists + +random.seed(12356) +n_cascade_levels = 7 +model_names = ["alaro13", "arome13"] +default_start_weights = [0.8, 0.5] +""" Helper functions """ + + +def generate_random_weights(n_cascade_levels, n_models=1): + """ + Generate random weights which decay exponentially with scale. + """ + start_weights = np.array([random.uniform(0.5, 0.99) for i in range(n_models)]) + powers = np.arange(1, n_cascade_levels + 1) + return pow(start_weights[:, np.newaxis], powers) + + +def generate_fixed_weights(n_cascade_levels, n_models=1): + """ + Generate weights starting at default_start_weights which decay exponentially with scale. + """ + start_weights = np.resize(default_start_weights, n_models) + powers = np.arange(1, n_cascade_levels + 1) + return pow(start_weights[:, np.newaxis], powers) + + +""" Test arguments """ + +clim_arg_names = ("startdatestr", "enddatestr", "n_models", "expected_weights_today") + +test_enddates = ["20210701235500", "20210702010000", "20200930235500"] + +clim_arg_values = [ + ( + "20210701230000", + "20210701235500", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 12, + "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701230000", + "20210702000000", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 1, + "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), + }, + ), + ( + "20200801000000", + "20200930235500", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 288, + "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701230000", + "20210701235500", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 12, + "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701230000", + "20210702000000", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 1, + "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), + }, + ), + ( + "20200801000000", + "20200930235500", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 288, + "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), + }, + ), +] + + +@pytest.mark.parametrize(clim_arg_names, clim_arg_values) +def test_save_weights( + startdatestr, enddatestr, n_models, expected_weights_today, tmpdir +): + """Test if the weights are saved correctly and the daily average is computed""" + + # get validtime + currentdate = datetime.strptime(startdatestr, "%Y%m%d%H%M%S") + enddate = datetime.strptime(enddatestr, "%Y%m%d%H%M%S") + timestep = timedelta(minutes=5) + + outdir_path = tmpdir + + while currentdate <= enddate: + current_weights = generate_fixed_weights(n_cascade_levels, n_models) + save_weights(current_weights, currentdate, outdir_path, window_length=2) + currentdate += timestep + + weights_today_file = join(outdir_path, "NWP_weights_today.npy") + assert exists(weights_today_file) + + weights_today = load(weights_today_file) + + # Check type + assert type(weights_today) == type({}) + assert "mean_weights" in weights_today + assert "n" in weights_today + assert "last_validtime" in weights_today + assertEqual(weights_today, expected_weights_today) + + +if __name__ == "__main__": + save_weights( + generate_fixed_weights(n_cascade_levels, 1), + datetime.strptime("20200801000000", "%Y%m%d%H%M%S"), + "/tmp/", + ) From 02f046beaa18771bc10a46ae77834e9d54ddeaa5 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Tue, 17 Aug 2021 12:30:54 +0200 Subject: [PATCH 07/11] Fix short-circuit operations which do not work as expected in python. Use pickle instead of np.save(z) to avoid type issues. --- pysteps/blending/clim.py | 36 ++++++++++++++++++++---------------- 1 file changed, 20 insertions(+), 16 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index 6a9422f57..12db77b60 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -13,6 +13,7 @@ import numpy as np from os.path import exists, join +import pickle def get_default_weights(n_cascade_levels, n_models=1): @@ -73,30 +74,32 @@ def save_weights(current_weights, validtime, outdir_path, window_length=30): n_cascade_levels = current_weights.shape[1] # Load weights_today, a dictionary containing {mean_weights, n, last_validtime} - weights_today_file = join(outdir_path, "NWP_weights_today.npy") - weights_today = ( - exists(weights_today_file) - and np.load(weights_today_file) - or { + weights_today_file = join(outdir_path, "NWP_weights_today.pkl") + if exists(weights_today_file): + with open(weights_today_file, "rb") as f: + weights_today = pickle.load(f) + else: + weights_today = { "mean_weights": np.copy(current_weights), "n": 0, "last_validtime": validtime, } - ) # Load the past weights which is an array with dimensions day x model x scale_level past_weights_file = join(outdir_path, "NWP_weights_window.npy") - past_weights = exists(past_weights_file) and np.load(past_weights_file) or None - + past_weights = None + if exists(past_weights_file): + past_weights = np.load(past_weights_file) # First check if we have started a new day wrt the last written weights, in which # case we should update the daily weights file and reset daily statistics. if weights_today["last_validtime"].date() < validtime.date(): # Append weights to the list of the past X daily averages. - past_weights = ( - past_weights - and np.append(past_weights, [weights_today["mean_weights"]], axis=0) - or np.array([weights_today["mean_weights"]]) - ) + if past_weights is not None: + past_weights = np.append( + past_weights, [weights_today["mean_weights"]], axis=0 + ) + else: + past_weights = np.array([weights_today["mean_weights"]]) print(past_weights.shape) # Remove oldest if the number of entries exceeds the window length. if past_weights.shape[0] > window_length: @@ -116,8 +119,8 @@ def save_weights(current_weights, validtime, outdir_path, window_length=30): current_weights - weights_today["mean_weights"] ) / weights_today["n"] weights_today["last_validtime"] = validtime - - np.save(weights_today_file, weights_today, allow_pickle=True) + with open(weights_today_file, "wb") as f: + pickle.dump(weights_today, f) return None @@ -146,7 +149,8 @@ def calc_clim_weights(outdir_path, n_cascade_levels, nmodels=1, window_length=30 """ past_weights_file = join(outdir_path, "NWP_weights_window.npy") # past_weights has dimensions date x model x scale_level x .... - past_weights = exists(past_weights_file) and np.load(past_weights_file) or None + if exists(past_weights_file): + past_weights = np.load(past_weights_file) # check if there's enough data to compute the climatological skill if not past_weights or past_weights.shape[0] < window_length: return get_default_weights(nmodels, n_cascade_levels) From 6282fec80b43880ee2f0e7dba0ce9f8a30f08efe Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Tue, 17 Aug 2021 12:31:46 +0200 Subject: [PATCH 08/11] Add tests for pysteps climatological weight io and calculations. --- pysteps/tests/test_blending_clim.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pysteps/tests/test_blending_clim.py b/pysteps/tests/test_blending_clim.py index 7f2087de6..e790984c7 100644 --- a/pysteps/tests/test_blending_clim.py +++ b/pysteps/tests/test_blending_clim.py @@ -6,6 +6,8 @@ import random from datetime import datetime, timedelta from os.path import join, exists +import pickle +from numpy.testing import assert_array_equal random.seed(12356) n_cascade_levels = 7 @@ -36,7 +38,7 @@ def generate_fixed_weights(n_cascade_levels, n_models=1): clim_arg_names = ("startdatestr", "enddatestr", "n_models", "expected_weights_today") -test_enddates = ["20210701235500", "20210702010000", "20200930235500"] +test_enddates = ["20210701235500", "20210702000000", "20200930235500"] clim_arg_values = [ ( @@ -50,7 +52,7 @@ def generate_fixed_weights(n_cascade_levels, n_models=1): }, ), ( - "20210701230000", + "20210701235500", "20210702000000", 1, { @@ -117,20 +119,25 @@ def test_save_weights( while currentdate <= enddate: current_weights = generate_fixed_weights(n_cascade_levels, n_models) + print("Saving weights: ", current_weights, currentdate, outdir_path) save_weights(current_weights, currentdate, outdir_path, window_length=2) currentdate += timestep - weights_today_file = join(outdir_path, "NWP_weights_today.npy") + weights_today_file = join(outdir_path, "NWP_weights_today.pkl") assert exists(weights_today_file) - - weights_today = load(weights_today_file) + with open(weights_today_file, "rb") as f: + weights_today = pickle.load(f) # Check type assert type(weights_today) == type({}) assert "mean_weights" in weights_today assert "n" in weights_today assert "last_validtime" in weights_today - assertEqual(weights_today, expected_weights_today) + assert_array_equal( + weights_today["mean_weights"], expected_weights_today["mean_weights"] + ) + assert weights_today["n"] == expected_weights_today["n"] + assert weights_today["last_validtime"] == expected_weights_today["last_validtime"] if __name__ == "__main__": From eb4385b8981b1feffdba040ffab2d59ba31af8b9 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Tue, 17 Aug 2021 17:12:50 +0200 Subject: [PATCH 09/11] Add path_workdir to outputs section in pystepsrc file and use it as a default path to store/retrieve blending weights. --- pysteps/blending/clim.py | 17 ++++++++++++++--- pysteps/pystepsrc | 3 ++- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index 12db77b60..df32fcc60 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -14,9 +14,10 @@ import numpy as np from os.path import exists, join import pickle +from pysteps import rcparams -def get_default_weights(n_cascade_levels, n_models=1): +def get_default_weights(n_cascade_levels=8, n_models=1): """ Get the default weights as given in BPS2004. Take subset of n_cascade_levels or add entries with small values (1e-4) if @@ -49,7 +50,12 @@ def get_default_weights(n_cascade_levels, n_models=1): return np.resize(default_weights, (n_models, n_cascade_levels)) -def save_weights(current_weights, validtime, outdir_path, window_length=30): +def save_weights( + current_weights, + validtime, + outdir_path=rcparams.outputs["path_workdir"], + window_length=30, +): """ Add the current NWP weights to update today's daily average weight. If the day is over, update the list of daily average weights covering a rolling @@ -125,7 +131,12 @@ def save_weights(current_weights, validtime, outdir_path, window_length=30): return None -def calc_clim_weights(outdir_path, n_cascade_levels, nmodels=1, window_length=30): +def calc_clim_weights( + outdir_path=rcparams.outputs["path_workdir"], + n_cascade_levels=8, + nmodels=1, + window_length=30, +): """ Return the climatological weights based on the daily average weights in the rolling window. This is done using a geometric mean. diff --git a/pysteps/pystepsrc b/pysteps/pystepsrc index 7e2b2c9d7..ff8fcb6cc 100644 --- a/pysteps/pystepsrc +++ b/pysteps/pystepsrc @@ -4,7 +4,8 @@ "silent_import": false, "outputs": { // path_outputs : path where to save results (figures, forecasts, etc) - "path_outputs": "./" + "path_outputs": "./", + "path_workdir": "./tmp/" }, "plot": { // "motion_plot" : "streamplot" or "quiver" From aa028a0f031b4bd7762b686c4a736b25e3b93141 Mon Sep 17 00:00:00 2001 From: RubenImhoff Date: Wed, 18 Aug 2021 13:52:57 +0200 Subject: [PATCH 10/11] Minor changes to docstrings, changes to skill scores and testing scripts --- pysteps/blending/clim.py | 17 ++++---- pysteps/blending/skill_scores.py | 43 +++++++++++++-------- pysteps/tests/test_blending_skill_scores.py | 31 +++++++++------ 3 files changed, 56 insertions(+), 35 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index df32fcc60..ba9a03b10 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -7,6 +7,7 @@ .. autosummary:: :toctree: ../generated/ + get_default_weights save_weights calc_clim_weights """ @@ -25,9 +26,9 @@ def get_default_weights(n_cascade_levels=8, n_models=1): Parameters ---------- - n_cascade_levels: int + n_cascade_levels: int, optional Number of cascade levels. - nmodels: int, optional + n_models: int, optional Number of NWP models Returns @@ -132,8 +133,8 @@ def save_weights( def calc_clim_weights( - outdir_path=rcparams.outputs["path_workdir"], n_cascade_levels=8, + outdir_path=rcparams.outputs["path_workdir"], nmodels=1, window_length=30, ): @@ -143,10 +144,10 @@ def calc_clim_weights( Parameters ---------- - outdir_path: string - Path to folder where the historical weights are stored. - n_cascade_levels: int + n_cascade_levels: int, optional Number of cascade levels. + outdir_path: string, optional + Path to folder where the historical weights are stored. nmodels: int, optional Number of NWP models window_length: int, optional @@ -162,9 +163,11 @@ def calc_clim_weights( # past_weights has dimensions date x model x scale_level x .... if exists(past_weights_file): past_weights = np.load(past_weights_file) + else: + past_weights = None # check if there's enough data to compute the climatological skill if not past_weights or past_weights.shape[0] < window_length: - return get_default_weights(nmodels, n_cascade_levels) + return get_default_weights(n_cascade_levels, nmodels) # reduce window if necessary else: past_weights = past_weights[-window_length:] diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index f8225959f..d53ac3548 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -65,7 +65,7 @@ def spatial_correlation(obs, mod): return rho -def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): +def lt_dependent_cor_nwp(lt, correlations, skill_kwargs=dict()): """Determine the correlation of a model field for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. @@ -79,10 +79,9 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): Array of shape [n_cascade_levels] containing per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. - outdir_path: str - Path to folder where the historical weights are stored. skill_kwargs : dict, optional - Dictionary containing e.g. the nmodels and window_length parameters. + Dictionary containing e.g. the outdir_path, nmodels and window_length + parameters. Returns ------- @@ -99,9 +98,7 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): """ # Obtain the climatological values towards which the correlations will # regress - clim_cor_values, regr_pars = clim_regr_values( - len(correlations), outdir_path, skill_kwargs=None - ) + clim_cor_values, regr_pars = clim_regr_values(n_cascade_levels=len(correlations)) # Determine the speed of the regression (eq. 24 in BPS2004) qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) # Determine the correlation for lead time lt @@ -158,7 +155,8 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non return rho, rho_prev -def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): +# TODO: Make sure the method can also handle multiple model inputs.. +def clim_regr_values(n_cascade_levels, skill_kwargs=dict()): """Obtains the climatological correlation values and regression parameters from a file called NWP_weights_window.bin in the outdir_path. If this file is not present yet, the values from :cite:`BPS2004` are used. @@ -168,10 +166,9 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): ---------- n_cascade_levels : int The number of cascade levels to use. - outdir_path: str - Path to folder where the historical weights are stored. skill_kwargs : dict, optional - Dictionary containing e.g. the nmodels and window_length parameters. + Dictionary containing e.g. the outdir_path, nmodels and window_length + parameters. Returns ------- @@ -201,16 +198,18 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): # First, obtain climatological skill values try: clim_cor_values = clim.calc_clim_weights( - outdir_path, n_cascade_levels, **skill_kwargs + n_cascade_levels=n_cascade_levels, **skill_kwargs ) except FileNotFoundError: # The climatological skill values file does not exist yet, so we'll # use the default values from BPS2004. clim_cor_values = np.array( - [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] + [[0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040]] ) - # Check if clim_cor_values has only one dimension, otherwise it has + clim_cor_values = clim_cor_values[0] + + # Check if clim_cor_values has only one model, otherwise it has # returned the skill values for multiple models if clim_cor_values.ndim != 1: raise IndexError( @@ -224,11 +223,10 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): # Get the number of cascade levels that is missing n_extra_lev = n_cascade_levels - clim_cor_values.shape[0] # Append the array with correlation values of 10e-4 - clim_cor_values = np.append(clim_cor_values, np.repeat(10e-4, n_extra_lev)) + clim_cor_values = np.append(clim_cor_values, np.repeat(1e-4, n_extra_lev)) # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded, # change to own values when present. - # TODO: Regression pars for different cascade levels regr_pars = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], @@ -236,4 +234,17 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): ] ) + # Check whether the number of cascade_levels is correct + if regr_pars.shape[1] > n_cascade_levels: + regr_pars = regr_pars[:, 0:n_cascade_levels] + elif regr_pars.shape[1] < n_cascade_levels: + # Get the number of cascade levels that is missing + n_extra_lev = n_cascade_levels - regr_pars.shape[1] + # Append the array with correlation values of 10e-4 + regr_pars = np.append( + regr_pars, + [np.repeat(10.0, n_extra_lev), np.repeat(10e4, n_extra_lev)], + axis=1, + ) + return clim_cor_values, regr_pars diff --git a/pysteps/tests/test_blending_skill_scores.py b/pysteps/tests/test_blending_skill_scores.py index ced452fbc..0bf4f1a98 100644 --- a/pysteps/tests/test_blending_skill_scores.py +++ b/pysteps/tests/test_blending_skill_scores.py @@ -19,7 +19,7 @@ ) clim_cor_values_6lev = np.array([0.848, 0.537, 0.237, 0.065, 0.020, 0.0044]) clim_cor_values_9lev = np.array( - [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 10e-4] + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 1e-4] ) # Set the regression values @@ -191,8 +191,6 @@ # The test @pytest.mark.parametrize(skill_scores_arg_names, skill_scores_arg_values) - -# TODO: Also test the pysteps.blending.clim.calc_clim_weights functionality in # The test function to be used def test_blending_skill_scores( obs, @@ -216,7 +214,7 @@ def test_blending_skill_scores( """ # Calculate the spatial correlation of the given model field - correlations_t0 = spatial_correlation(obs, mod) + correlations_t0 = np.array(spatial_correlation(obs, mod)) # Check if the field has the same number of cascade levels as the model # field and as the given n_cascade_levels @@ -231,27 +229,32 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_t0, expected_cor_t0, - "Returned spatial correlation is not the same as the expected value", + decimal=3, + err_msg="Returned spatial correlation is not the same as the expected value", ) # Test if the NWP correlation regresses towards the correct value given # a lead time in minutes # First, check if the climatological values are returned correctly - correlations_clim, regr_clim = clim_regr_values(n_cascade_levels, "random_path") + correlations_clim, regr_clim = clim_regr_values(n_cascade_levels=n_cascade_levels) assert ( correlations_clim.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" assert_array_almost_equal( correlations_clim, clim_cor_values, - "Not the correct climatological correlations were returned", + decimal=3, + err_msg="Not the correct climatological correlations were returned", ) assert_array_almost_equal( - regr_clim, regr_pars, "Not the correct regression parameters were returned" + regr_clim, + regr_pars, + decimal=3, + err_msg="Not the correct regression parameters were returned", ) # Then, check the regression of the correlation values - correlations_nwp_lt = lt_dependent_cor_nwp(lt, correlations_t0, "random_path") + correlations_nwp_lt = lt_dependent_cor_nwp(lt=lt, correlations=correlations_t0) assert ( correlations_nwp_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" @@ -261,14 +264,17 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_nwp_lt, expected_cor_nwp_lt, - "Correlations of NWP not equal to the expected correlations", + decimal=3, + err_msg="Correlations of NWP not equal to the expected correlations", ) # Finally, make sure nowcast correlation regresses towards the correct # value given some PHI-values. - correlations_nowcast_lt = lt_dependent_cor_extrapolation( + correlations_nowcast_lt, __ = lt_dependent_cor_extrapolation( PHI, correlations_t0, cor_prev ) + + print(correlations_nowcast_lt) assert ( correlations_nowcast_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" @@ -278,5 +284,6 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_nowcast_lt, expected_cor_nowcast_lt, - "Correlations of nowcast not equal to the expected correlations", + decimal=3, + err_msg="Correlations of nowcast not equal to the expected correlations", ) From 0463c070e842365bfd68ed1b11029298bc9fd84b Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Wed, 18 Aug 2021 14:27:43 +0200 Subject: [PATCH 11/11] Completed documentation for blending clim module, cleanup. --- pysteps/blending/clim.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py index ba9a03b10..fbb03e1ff 100644 --- a/pysteps/blending/clim.py +++ b/pysteps/blending/clim.py @@ -27,9 +27,9 @@ def get_default_weights(n_cascade_levels=8, n_models=1): Parameters ---------- n_cascade_levels: int, optional - Number of cascade levels. + Number of cascade levels. Defaults to 8. n_models: int, optional - Number of NWP models + Number of NWP models. Defaults to 1. Returns ------- @@ -66,11 +66,17 @@ def save_weights( ---------- current_weights: array-like Array of shape [model, scale_level, ...] - containing the current weights of the different NWP models per cascade level. - outdir_path: string - Path to folder where the historical weights are stored. + containing the current weights of the different NWP models per cascade + level. + validtime: datetime + Datetime object containing the date and time at for which the current + weights are valid. + outdir_path: string, optional + Path to folder where the historical weights are stored. Defaults to + path_workdir from rcparams. window_length: int, optional - Length of window (in days) over which to compute the climatological weights. + Length of window (in days) of daily weights that should be retained. + Defaults to 30. Returns ------- @@ -135,7 +141,7 @@ def save_weights( def calc_clim_weights( n_cascade_levels=8, outdir_path=rcparams.outputs["path_workdir"], - nmodels=1, + n_models=1, window_length=30, ): """ @@ -147,16 +153,19 @@ def calc_clim_weights( n_cascade_levels: int, optional Number of cascade levels. outdir_path: string, optional - Path to folder where the historical weights are stored. - nmodels: int, optional - Number of NWP models + Path to folder where the historical weights are stored. Defaults to + path_workdir from rcparams. + n_models: int, optional + Number of NWP models. Defaults to 1. window_length: int, optional - Length of window (in days) over which to compute the climatological weights. + Length of window (in days) over which to compute the climatological + weights. Defaults to 30. Returns ------- climatological_mean_weights: array-like - Array of shape [model, scale_level, ...] containing the climatological weights. + Array of shape [model, scale_level, ...] containing the climatological + (geometric) mean weights. """ past_weights_file = join(outdir_path, "NWP_weights_window.npy") @@ -167,7 +176,7 @@ def calc_clim_weights( past_weights = None # check if there's enough data to compute the climatological skill if not past_weights or past_weights.shape[0] < window_length: - return get_default_weights(n_cascade_levels, nmodels) + return get_default_weights(n_cascade_levels, n_models) # reduce window if necessary else: past_weights = past_weights[-window_length:]