From b01fac3a0f5b1c051bca747770f2a2a56c01f6b7 Mon Sep 17 00:00:00 2001 From: Carlos Velasco Date: Wed, 3 Apr 2019 15:17:13 +0000 Subject: [PATCH 01/17] First basic functions to implement STEPS blending --- pysteps/blending/steps.py | 111 ++++++++++++++++++++++++++++++++++++++ pysteps/blending/utils.py | 54 +++++++++++++++++++ 2 files changed, 165 insertions(+) create mode 100644 pysteps/blending/steps.py create mode 100644 pysteps/blending/utils.py diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py new file mode 100644 index 000000000..9632ff301 --- /dev/null +++ b/pysteps/blending/steps.py @@ -0,0 +1,111 @@ +""" +pysteps.blending.steps +====================== + +Implementation of the STEPS stochastic blending method as described in +:cite:`BPS2006`. + +.. autosummary:: + :toctree: ../generated/ + + calculate_ratios + calculate_weights + blender + +""" + +import numpy as np + + +def calculate_ratios(correlations): + """Calculate explain variance ratios from correlation. + + Parameters + ---------- + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component, scale level, + and optionally along [y, x] dimensions. + + Returns + ------- + out : numpy array + An array containing the ratios of explain variance for each + component, scale level, ... + """ + # correlations: [component, scale, ...] + square_corrs = np.square(correlations) + out = square_corrs / (1 - square_corrs) + # out: [component, scale, ...] + return out + + +def calculate_weights(correlations): + """Calculate blending weights for STEPS blending from correlation. + + Parameters + ---------- + correlations : array-like + Array of shape [component, scale_level, ...] + containing correlation (skills) for each component, scale level, + and optionally along [y, x] dimensions. + + Returns + ------- + weights : array-like + Array of shape [component+1, scale_level, ...] + containing the weights to be used in STEPS blending for + each original component plus noise, scale level, ... + """ + # correlations: [component, scale, ...] + # calculate weights for each source + ratios = calculate_ratios(correlations) + # ratios: [component, scale, ...] + total_ratios = np.sum(ratios, axis=0) + # total_ratios: [scale, ...] + weights = correlations * np.sqrt(ratios/total_ratios) + # weights: [component, scale, ...] + # calculate noise weights + total_square_weights = np.sum(np.square(weights),axis=0) + # total_square_weights: [scale,...] + noise_weight = np.sqrt(1.0 - total_square_weights) + weights = np.concatenate((weights, noise_weight[None,...]), axis=0) + return weights + + +def blend_cascades(cascades_norm,weights): + """Calculate blended cascades using STEPS weights. + + Parameters + ---------- + cascades_norm : array-like + Array of shape [number_components + 1, scale_level, ...] + with normalized cascades components for each component, scale level, + and optionally [y, x ] dimensions, + obtained by calling a method implemented in + pysteps.blending.utils.stack_cascades + + weights : array-like + An array of shape [number_components + 1, scale_level, ...] + containing the weights to be used in this rutine + for each component plus noise, scale level, ... + obtained by calling a method implemented in + pysteps.blending.steps.calculate_weights + + Returns + ------- + combined_cascade : array-like + An array of shape [scale_level, ...] + containing the weighted combination of cascades from multiple + componented to be used in STEPS blending. + """ + + #cascade_norm component, scales, .... + #weights component, scales, .... + all_c_wn = weights * cascades_norm + combined_cascade = np.sum(all_c_wn,axis=0) + print(combined_cascade.shape) + # combined_cascade [scale, ...] + return combined_cascade + + + diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py new file mode 100644 index 000000000..aa50c9c5c --- /dev/null +++ b/pysteps/blending/utils.py @@ -0,0 +1,54 @@ +""" +pysteps.blending.utils +====================== + +Module with common utilities used by blending methods. + +.. autosummary:: + :toctree: ../generated/ + + stack_cascades + recompose_cascade +""" + +import numpy as np + + +def stack_cascades(R_d, donorm=True): + """Stack the given cascades into a larger array. + + Parameters + ---------- + R_d : list + List of cascades obtained by calling a method implemented in + pysteps.cascade.decomposition. + donorm : bool + If True, normalize the cascade levels before stacking. + + Returns + ------- + out : tuple + A three-element tuple containing a four-dimensional array of stacked + cascade levels and arrays of mean values and standard deviations for each + cascade level. + """ + R_c = [] + mu_c = [] + sigma_c = [] + + for cascade in R_d: + R_ = [] + R_i = cascade["cascade_levels"] + n_levels = R_i.shape[0] + mu_ = np.ones(n_levels)* 0.0 + sigma_ = np.ones(n_levels) *1.0 + if donorm: + mu_ = np.asarray(cascade["means"]) + sigma_ = np.asarray(cascade["stds"]) + for j in range(n_levels): + R__ = (R_i[j, :, :] - mu_[j]) / sigma_[j] + R_.append(R__) + R_c.append(np.stack(R_)) + mu_c.append(mu_) + sigma_c.append(sigma_) + return np.stack(R_c),np.stack(mu_c),np.stack(sigma_c) From 236fb2e681093a998cea342889b7e78611b396fd Mon Sep 17 00:00:00 2001 From: Carlos Velasco Date: Fri, 5 Apr 2019 13:21:04 +0000 Subject: [PATCH 02/17] Add compute of blend means,sigmas and recompose --- pysteps/blending/steps.py | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 9632ff301..458d18eab 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -10,7 +10,9 @@ calculate_ratios calculate_weights - blender + blend_cascades + blend_means_sigmas + recompose_cascade """ @@ -108,4 +110,35 @@ def blend_cascades(cascades_norm,weights): return combined_cascade +def blend_means_sigmas(means,sigmas,weights): + #means component, scales, .... + #sigmas component, scales, .... + #weights component, scales, .... + total_weights = np.sum(weights,axis=0) + factors = weights / total_weights + diff_dims = factors.ndim - means.ndim + if diff_dims: + for i in range(diff_dims): + means = np.expand_dims(means, axis=means.ndim) + diff_dims = factors.ndim - sigmas.ndim + if diff_dims: + for i in range(diff_dims): + sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) + weighted_means = np.sum((factors * means),axis=0) + weighted_sigmas = np.sum((factors * sigmas),axis=0) + + # to do: substract covarainces to weigthed sigmas + + return weighted_means, weighted_sigmas + + +def recompose_cascade(w_cascade, w_mean,w_sigma): + renorm = (w_cascade * w_sigma) + w_mean + # print(renorm.shape) + out = np.sum(renorm,axis=0) + # print(out.shape) + return out + + + From b58d8066185219aaae2efbbae60bd0a6a07a1849 Mon Sep 17 00:00:00 2001 From: Carlos Velasco Date: Tue, 10 Aug 2021 08:29:57 +0000 Subject: [PATCH 03/17] Add blend_optical_flow --- pysteps/blending/steps.py | 60 ++++++++++++++++----------------- pysteps/blending/utils.py | 70 +++++++++++++++++++++++++++++++++++---- 2 files changed, 91 insertions(+), 39 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 458d18eab..569e7f80e 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -2,12 +2,12 @@ pysteps.blending.steps ====================== -Implementation of the STEPS stochastic blending method as described in +Implementation of the STEPS stochastic blending method as described in :cite:`BPS2006`. .. autosummary:: :toctree: ../generated/ - + calculate_ratios calculate_weights blend_cascades @@ -21,7 +21,7 @@ def calculate_ratios(correlations): """Calculate explain variance ratios from correlation. - + Parameters ---------- Array of shape [component, scale_level, ...] @@ -43,14 +43,14 @@ def calculate_ratios(correlations): def calculate_weights(correlations): """Calculate blending weights for STEPS blending from correlation. - + Parameters ---------- correlations : array-like Array of shape [component, scale_level, ...] containing correlation (skills) for each component, scale level, and optionally along [y, x] dimensions. - + Returns ------- weights : array-like @@ -59,7 +59,7 @@ def calculate_weights(correlations): each original component plus noise, scale level, ... """ # correlations: [component, scale, ...] - # calculate weights for each source + # calculate weights for each source ratios = calculate_ratios(correlations) # ratios: [component, scale, ...] total_ratios = np.sum(ratios, axis=0) @@ -67,16 +67,16 @@ def calculate_weights(correlations): weights = correlations * np.sqrt(ratios/total_ratios) # weights: [component, scale, ...] # calculate noise weights - total_square_weights = np.sum(np.square(weights),axis=0) + total_square_weights = np.sum(np.square(weights), axis=0) # total_square_weights: [scale,...] noise_weight = np.sqrt(1.0 - total_square_weights) - weights = np.concatenate((weights, noise_weight[None,...]), axis=0) + weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) return weights -def blend_cascades(cascades_norm,weights): +def blend_cascades(cascades_norm, weights): """Calculate blended cascades using STEPS weights. - + Parameters ---------- cascades_norm : array-like @@ -87,34 +87,34 @@ def blend_cascades(cascades_norm,weights): pysteps.blending.utils.stack_cascades weights : array-like - An array of shape [number_components + 1, scale_level, ...] + An array of shape [number_components + 1, scale_level, ...] containing the weights to be used in this rutine for each component plus noise, scale level, ... obtained by calling a method implemented in pysteps.blending.steps.calculate_weights - + Returns ------- combined_cascade : array-like An array of shape [scale_level, ...] - containing the weighted combination of cascades from multiple + containing the weighted combination of cascades from multiple componented to be used in STEPS blending. """ - #cascade_norm component, scales, .... - #weights component, scales, .... + # cascade_norm component, scales, .... + # weights component, scales, .... all_c_wn = weights * cascades_norm - combined_cascade = np.sum(all_c_wn,axis=0) - print(combined_cascade.shape) + combined_cascade = np.sum(all_c_wn, axis=0) + # print(combined_cascade.shape) # combined_cascade [scale, ...] return combined_cascade -def blend_means_sigmas(means,sigmas,weights): - #means component, scales, .... - #sigmas component, scales, .... - #weights component, scales, .... - total_weights = np.sum(weights,axis=0) +def blend_means_sigmas(means, sigmas, weights): + # means component, scales, .... + # sigmas component, scales, .... + # weights component, scales, .... + total_weights = np.sum(weights, axis=0) factors = weights / total_weights diff_dims = factors.ndim - means.ndim if diff_dims: @@ -124,21 +124,17 @@ def blend_means_sigmas(means,sigmas,weights): if diff_dims: for i in range(diff_dims): sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) - weighted_means = np.sum((factors * means),axis=0) - weighted_sigmas = np.sum((factors * sigmas),axis=0) + weighted_means = np.sum((factors * means), axis=0) + weighted_sigmas = np.sum((factors * sigmas), axis=0) # to do: substract covarainces to weigthed sigmas - + return weighted_means, weighted_sigmas - -def recompose_cascade(w_cascade, w_mean,w_sigma): + +def recompose_cascade(w_cascade, w_mean, w_sigma): renorm = (w_cascade * w_sigma) + w_mean # print(renorm.shape) - out = np.sum(renorm,axis=0) + out = np.sum(renorm, axis=0) # print(out.shape) return out - - - - diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index aa50c9c5c..1584505d6 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -16,7 +16,7 @@ def stack_cascades(R_d, donorm=True): """Stack the given cascades into a larger array. - + Parameters ---------- R_d : list @@ -24,7 +24,7 @@ def stack_cascades(R_d, donorm=True): pysteps.cascade.decomposition. donorm : bool If True, normalize the cascade levels before stacking. - + Returns ------- out : tuple @@ -35,13 +35,13 @@ def stack_cascades(R_d, donorm=True): R_c = [] mu_c = [] sigma_c = [] - + for cascade in R_d: R_ = [] R_i = cascade["cascade_levels"] n_levels = R_i.shape[0] - mu_ = np.ones(n_levels)* 0.0 - sigma_ = np.ones(n_levels) *1.0 + mu_ = np.ones(n_levels) * 0.0 + sigma_ = np.ones(n_levels) * 1.0 if donorm: mu_ = np.asarray(cascade["means"]) sigma_ = np.asarray(cascade["stds"]) @@ -50,5 +50,61 @@ def stack_cascades(R_d, donorm=True): R_.append(R__) R_c.append(np.stack(R_)) mu_c.append(mu_) - sigma_c.append(sigma_) - return np.stack(R_c),np.stack(mu_c),np.stack(sigma_c) + sigma_c.append(sigma_) + return np.stack(R_c), np.stack(mu_c), np.stack(sigma_c) + + +def blend_optical_flows(flows, weights): + """Combine advection fields using given weights. + + Parameters + ---------- + flows : array-like + A stack of multiple advenction fields having shape + (S, 2, m, n), where flows[N, :, :, :] contains the motion vectors + for source N. + Advection fields for each source can be obtanined by + calling any of the methods implemented in + pysteps.motion and then stack all together + weights : array-like + An array of shape [number_sources] + containing the weights to be used to combine + the advection fields of each source. + weights are modified to make their sum equal to one. + Returns + ------- + out: ndarray_ + Return the blended advection field having shape + (2, m, n), where out[0, :, :] contains the x-components of + the blended motion vectors and out[1, :, :] contains the y-components. + The velocities are in units of pixels / timestep. + """ + + # check inputs + if isinstance(flows, (list, tuple)): + flows = np.stack(flows) + + if isinstance(weights, (list, tuple)): + weights = np.asarray(weights) + + # check weights dimensions match number of sources + num_sources = flows.shape[0] + num_weights = weights.shape[0] + + if num_weights != num_sources: + raise ValueError( + "dimension mismatch between flows and weights.\n" + "weights dimension must match the number of flows.\n" + f"number of flows={num_sources}, number of weights={num_weights}" + ) + # normalize weigths + weights = weights / np.sum(weights) + + # flows dimension sources, 2, m, n + # weights dimension sources + # move source axis to last to allow broadcasting + all_c_wn = weights * np.moveaxis(flows, 0, -1) + # sum uses last axis + combined_flows = np.sum(all_c_wn, axis=-1) + # combined_flows [2, m, n] + return combined_flows From a568e4a349eeb7331674877bd143d9118afee798 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Tue, 10 Aug 2021 15:30:16 +0200 Subject: [PATCH 04/17] changes to steps blending procedure - weights according to adjusted BPS2006 method --- pysteps/blending/steps.py | 142 +++++++++++++++++++++++++++----------- 1 file changed, 103 insertions(+), 39 deletions(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 569e7f80e..27825d1a0 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -1,9 +1,12 @@ +# -*- coding: utf-8 -*- """ pysteps.blending.steps ====================== Implementation of the STEPS stochastic blending method as described in -:cite:`BPS2006`. +:cite:`BPS2006`. The method assumes the presence of one NWP model or ensemble +member to be blended with one nowcast. More models, such as in :cite:`SPN2013` +is possible with this code, but we recommend the use of just two models. .. autosummary:: :toctree: ../generated/ @@ -25,8 +28,8 @@ def calculate_ratios(correlations): Parameters ---------- Array of shape [component, scale_level, ...] - containing correlation (skills) for each component, scale level, - and optionally along [y, x] dimensions. + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. Returns ------- @@ -36,6 +39,8 @@ def calculate_ratios(correlations): """ # correlations: [component, scale, ...] square_corrs = np.square(correlations) + # Calculate the ratio of the explained variance to the unexplained + # variance of the nowcast and NWP model components out = square_corrs / (1 - square_corrs) # out: [component, scale, ...] return out @@ -48,92 +53,151 @@ def calculate_weights(correlations): ---------- correlations : array-like Array of shape [component, scale_level, ...] - containing correlation (skills) for each component, scale level, - and optionally along [y, x] dimensions. + containing correlation (skills) for each component (NWP and nowcast), + scale level, and optionally along [y, x] dimensions. Returns ------- weights : array-like Array of shape [component+1, scale_level, ...] containing the weights to be used in STEPS blending for - each original component plus noise, scale level, ... + each original component plus an addtional noise component, scale level, + and optionally along [y, x] dimensions. """ # correlations: [component, scale, ...] - # calculate weights for each source + # Calculate weights for each source ratios = calculate_ratios(correlations) # ratios: [component, scale, ...] total_ratios = np.sum(ratios, axis=0) - # total_ratios: [scale, ...] + # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 weights = correlations * np.sqrt(ratios/total_ratios) # weights: [component, scale, ...] - # calculate noise weights - total_square_weights = np.sum(np.square(weights), axis=0) - # total_square_weights: [scale,...] - noise_weight = np.sqrt(1.0 - total_square_weights) + # Calculate the weight of the noise component. This is different from + # BPS 2006, as the sum of the weights should equal 1 here. + noise_weight = 1.0 - np.sum(weights, axis=0) + # noise_weight: [scale, ...] + + # Original BPS2006 method in the following two lines + # total_square_weights = np.sum(np.square(weights), axis=0) + # noise_weight = np.sqrt(1.0 - total_square_weights) + #TODO: determine the weights method and/or add different functions + + # Finally, add the noise_weights to the weights variable. weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) return weights def blend_cascades(cascades_norm, weights): - """Calculate blended cascades using STEPS weights. + """Calculate blended normalized cascades using STEPS weights following eq. + 10 in :cite:`BPS2006`. Parameters ---------- cascades_norm : array-like Array of shape [number_components + 1, scale_level, ...] - with normalized cascades components for each component, scale level, - and optionally [y, x ] dimensions, - obtained by calling a method implemented in - pysteps.blending.utils.stack_cascades + with normalized cascades components for each component + (NWP, nowcasts, noise) and scale level, obtained by calling a method + implemented in pysteps.blending.utils.stack_cascades weights : array-like An array of shape [number_components + 1, scale_level, ...] - containing the weights to be used in this rutine - for each component plus noise, scale level, ... - obtained by calling a method implemented in + containing the weights to be used in this routine + for each component plus noise, scale level, and optionally [y, x] + dimensions, obtained by calling a method implemented in pysteps.blending.steps.calculate_weights Returns ------- combined_cascade : array-like - An array of shape [scale_level, ...] - containing the weighted combination of cascades from multiple - componented to be used in STEPS blending. + An array of shape [scale_level, y, x] + containing per scale level (cascade) the weighted combination of + cascades from multiple components (NWP, nowcasts and noise) to be used + in STEPS blending. """ - - # cascade_norm component, scales, .... + # cascade_norm component, scales, y, x # weights component, scales, .... all_c_wn = weights * cascades_norm combined_cascade = np.sum(all_c_wn, axis=0) - # print(combined_cascade.shape) # combined_cascade [scale, ...] return combined_cascade def blend_means_sigmas(means, sigmas, weights): - # means component, scales, .... - # sigmas component, scales, .... - # weights component, scales, .... - total_weights = np.sum(weights, axis=0) - factors = weights / total_weights - diff_dims = factors.ndim - means.ndim + """Calculate the blended means and sigmas, the normalization parameters + needed to recompose the cascade. This procedure is similar to the + blending of the normalized cascades and uses the same weights. + + + Parameters + ---------- + means : array-like + Array of shape [number_components + 1, scale_level, ...] + with the mean for each component (NWP, nowcasts, noise). + sigmas : array-like + Array of shape [number_components + 1, scale_level, ...] + with the standard deviation for each component. + weights : array-like + An array of shape [number_components + 1, scale_level, ...] + containing the weights to be used in this routine + for each component plus noise, scale level, and optionally [y, x] + dimensions, obtained by calling a method implemented in + pysteps.blending.steps.calculate_weights + + Returns + ------- + combined_means : array-like + An array of shape [scale_level, ...] + containing per scale level (cascade) the weighted combination of + means from multiple components (NWP, nowcasts and noise). + combined_sigmas : array-like + An array of shape [scale_level, ...] + similar to combined_means, but containing the standard deviations. + + """ + # Check if the dimensions are the same + diff_dims = weights.ndim - means.ndim if diff_dims: for i in range(diff_dims): means = np.expand_dims(means, axis=means.ndim) - diff_dims = factors.ndim - sigmas.ndim + diff_dims = weights.ndim - sigmas.ndim if diff_dims: for i in range(diff_dims): sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) - weighted_means = np.sum((factors * means), axis=0) - weighted_sigmas = np.sum((factors * sigmas), axis=0) + # Combine (blend) the means and sigmas + combined_means = np.sum((weights * means), axis=0) + combined_sigmas = np.sum((weights * sigmas), axis=0) + #TODO: substract covarainces to weigthed sigmas - # to do: substract covarainces to weigthed sigmas + return combined_means, combined_sigmas - return weighted_means, weighted_sigmas +def recompose_cascade(combined_cascade, combined_mean, combined_sigma): + """ Recompose the cascades into a transformed rain rate field. + -def recompose_cascade(w_cascade, w_mean, w_sigma): - renorm = (w_cascade * w_sigma) + w_mean + Parameters + ---------- + combined_cascade : array-like + An array of shape [scale_level, y, x] + containing per scale level (cascade) the weighted combination of + cascades from multiple components (NWP, nowcasts and noise) to be used + in STEPS blending. + combined_mean : array-like + An array of shape [scale_level, ...] + similar to combined_cascade, but containing the normalization parameter + mean. + combined_sigma : array-like + An array of shape [scale_level, ...] + similar to combined_cascade, but containing the normalization parameter + standard deviation. + + Returns + ------- + out : TYPE + DESCRIPTION. + + """ + renorm = (combined_cascade * combined_sigma) + combined_mean # print(renorm.shape) out = np.sum(renorm, axis=0) # print(out.shape) From fff9c7ea8a13a8aee79ee5bd09ace5eb2d97dd9c Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Tue, 10 Aug 2021 16:11:59 +0200 Subject: [PATCH 05/17] changes to blending procedures - adjust weights from original BPS2006 method --- pysteps/blending/steps.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 27825d1a0..eb29362ff 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -90,7 +90,7 @@ def calculate_weights(correlations): def blend_cascades(cascades_norm, weights): """Calculate blended normalized cascades using STEPS weights following eq. 10 in :cite:`BPS2006`. - + Parameters ---------- cascades_norm : array-like From 1552a4772733c20a3caadc7c0be569e0b395c343 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Wed, 11 Aug 2021 10:17:12 +0200 Subject: [PATCH 06/17] Determine spatial correlation of NWP model forecast --- pysteps/blending/skill_scores.py | 60 ++++++++++++++++++++++++++++++++ pysteps/blending/utils.py | 2 ++ 2 files changed, 62 insertions(+) create mode 100644 pysteps/blending/skill_scores.py diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py new file mode 100644 index 000000000..4c9551a97 --- /dev/null +++ b/pysteps/blending/skill_scores.py @@ -0,0 +1,60 @@ +# -*- coding: utf-8 -*- +""" +pysteps.blending.skill_scores +============================== + +Methods for computing skill scores, needed for the blending weights, of two- +dimensional model fields with the latest observation field. + +.. autosummary:: + :toctree: ../generated/ + + spatial_correlation +""" + +import numpy as np + + +def spatial_correlation(obs, mod): + """Determine the spatial correlation between the cascade of the latest + available observed (radar) rainfall field and a time-synchronous cascade + derived from a model (generally NWP) field. Both fields are assumed to use + the same grid. + + + Parameters + ---------- + obs : array-like + Array of shape [cascade_level, y, x] with per cascade_level the + normalized cascade of the observed (radar) rainfall field. + mod : array-like + Array of shape [cascade_level, y, x] with per cascade_level the + normalized cascade of the model field. + + Returns + ------- + rho : array-like + Array of shape [n_cascade_levels] containing per cascade_level the + correlation between the normalized cascade of the observed (radar) + rainfall field and the normalized cascade of the model field. + + References + ---------- + :cite:`BPS2006` + :cite:`SPN2013` + + """ + rho = [] + # Fill rho per cascade level, so loop through the cascade levels + for cascade_level in range(0,obs.shape[0]): + # Flatten both arrays + obs_1d = obs[cascade_level, :, :].flatten() + mod_1d = mod[cascade_level, :, :].flatten() + # Calculate the correlation between the two + cov = np.sum((mod_1d - np.mean(mod_1d))*(obs_1d - np.mean(obs_1d))) # Without 1/n, as this cancels out (same for stdx and -y) + std_obs = np.sqrt(np.sum((obs_1d - np.mean(obs_1d))**2.0)) + std_mod = np.sqrt(np.sum((mod_1d - np.mean(mod_1d))**2.0)) + rho.append(cov / (std_mod * std_obs)) + + return rho + diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index 1584505d6..b47e18e94 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- """ pysteps.blending.utils ====================== @@ -108,3 +109,4 @@ def blend_optical_flows(flows, weights): combined_flows = np.sum(all_c_wn, axis=-1) # combined_flows [2, m, n] return combined_flows + From 912282b93a9b19cc22e4e59e50b21ecd9c9fdcc9 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Wed, 11 Aug 2021 15:14:53 +0200 Subject: [PATCH 07/17] First attempt to make correlations and thus weights lead time dependent (in progress..) --- pysteps/blending/skill_scores.py | 102 +++++++++++++++++++++++++++++++ pysteps/blending/steps.py | 2 +- 2 files changed, 103 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 4c9551a97..9a22e0878 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -10,6 +10,7 @@ :toctree: ../generated/ spatial_correlation + lt_dependent_cor_nwp """ import numpy as np @@ -58,3 +59,104 @@ def spatial_correlation(obs, mod): return rho + +def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): + """Determine the correlation of the model field for lead time lt and + cascade k, by assuming that the correlation determined at t=0 regresses + towards the climatological values. + + + Parameters + ---------- + lt : int + The lead time of the forecast in minutes. + correlations : array-like + Array of shape [n_cascade_levels] containing per cascade_level the + correlation between the normalized cascade of the observed (radar) + rainfall field and the normalized cascade of the model field. + clim_regr_file : str, optional + The location of the file with the climatological correlation values + and regression parameters. + + Returns + ------- + rho : array-like + Array of shape [n_cascade_levels] containing, for lead time lt, per + cascade_level the correlation between the normalized cascade of the + observed (radar) rainfall field and the normalized cascade of the + model field. + + References + ---------- + :cite:`BPS2004` + :cite:`BPS2006` + """ + # Obtain the climatological values towards which the correlations will + # regress + clim_cor_values, regr_pars = clim_regr_values(len(correlations), + clim_regr_file + ) + # Determine the speed of the regression (eq. 24 in BPS2004) + qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) + # Determine the correlation for lead time lt + rho = qm * correlations + (1 - qm) * clim_cor_values + + return rho + + +#TODO: Add a lt_dependent_cor_extrapolation for the nowcast + + +#TODO: Make sure the initial values also work for n_cascade_levels != 8. +def clim_regr_values(n_cascade_levels, clim_regr_file=None): + """Obtains the climatological correlation values and regression parameters + from a file called ... in ... If this file is not present yet, the values + from :cite:`BPS2004`. + + + Parameters + ---------- + n_cascade_levels : int + The number of cascade levels to use. + clim_regr_file : str, optional + Location of the file with the climatological correlation values + and regression parameters. + + Returns + ------- + clim_cor_values : array-like + Array of shape [n_cascade_levels] containing the + climatological values of the lag 1 and lag 2 auto-correlation + coefficients, obtained by calling a method implemented in + pysteps.blending.skill_scores.get_clim_skill_scores. + regr_pars : array-like + Array of shape [2, n_cascade_levels] containing the regression + parameters for the auto-correlation coefficients, obtained by calling + a method implemented in + pysteps.blending.skill_scores.get_clim_skill_scores. + + """ + if clim_regr_file is not None: + #TODO: Finalize this function when the I/O skill file system has been + # set up (work in progress by Lesley). + + # 1. Open the file + + # 2. Get the climatological correlation values and the regression + # parameters + clim_cor_values = ... + regr_pars = ... + + else: + # Get the values from BPS2004 + clim_cor_values = np.array( + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, + 0.0052, 0.0040] + ) + regr_pars = np.array( + [[130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4] + ] + ) + + return clim_cor_values, regr_pars diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index eb29362ff..d51582bf7 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -23,7 +23,7 @@ def calculate_ratios(correlations): - """Calculate explain variance ratios from correlation. + """Calculate explained variance ratios from correlation. Parameters ---------- From ce4658235b1cc487fe5e0216dde85682dabeaded Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Thu, 12 Aug 2021 16:01:46 +0200 Subject: [PATCH 08/17] Change back to original BPS2006 blending formulation and add regression of skill values to climatological values for weights determination --- pysteps/blending/skill_scores.py | 50 +++++++++++++++++++++- pysteps/blending/steps.py | 71 +++++++++++++++++++++++--------- pysteps/blending/utils.py | 28 ++++++++++++- 3 files changed, 126 insertions(+), 23 deletions(-) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 9a22e0878..672797a52 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -11,6 +11,8 @@ spatial_correlation lt_dependent_cor_nwp + lt_dependent_cor_extrapolation + clim_regr_values """ import numpy as np @@ -61,7 +63,7 @@ def spatial_correlation(obs, mod): def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): - """Determine the correlation of the model field for lead time lt and + """Determine the correlation of a model field for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. @@ -104,8 +106,52 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): return rho -#TODO: Add a lt_dependent_cor_extrapolation for the nowcast +def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=None): + """Determine the correlation of the extrapolation (nowcast) component for + lead time lt and cascade k, by assuming that the correlation determined at + t=0 regresses towards the climatological values. + + + Parameters + ---------- + PHI : array-like + Array of shape [n_cascade_levels, ar_order + 1] containing per + cascade level the autoregression parameters. + correlations : array-like, optional + Array of shape [n_cascade_levels] containing per cascade_level the + latest available correlation from the extrapolation component that can + be found from the AR-2 model. + correlations_prev : array-like, optional + Similar to correlations, but from the timestep before that. + Returns + ------- + rho : array-like + Array of shape [n_cascade_levels] containing, for lead time lt, per + cascade_level the correlation of the extrapolation component. + + References + ---------- + :cite:`BPS2004` + :cite:`BPS2006` + + """ + # Check if correlations_prev exists, if not, we set it to 1.0 + if correlations_prev is None: + correlations_prev = np.repeat(1.0, PHI.shape[0]) + # Same for correlations at first time step, we set it to + # phi1 / (1 - phi2), see BPS2004 + if correlations is None: + correlations = PHI[:, 0] / (1.0 - PHI[:, 1]) + + # Calculate the correlation for lead time lt + rho = PHI[:, 0] * correlations + PHI[:, 1] * correlations_prev + + # Finally, set the current correlations array as the previous one for the + # next time step + rho_prev = correlations + + return rho, rho_prev #TODO: Make sure the initial values also work for n_cascade_levels != 8. def clim_regr_values(n_cascade_levels, clim_regr_file=None): diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index d51582bf7..f20e96062 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -17,10 +17,15 @@ blend_means_sigmas recompose_cascade +References +---------- +:cite:`BPS2004` +:cite:`BPS2006` +:cite:`SPN2013` """ import numpy as np - +from pysteps.blending import utils def calculate_ratios(correlations): """Calculate explained variance ratios from correlation. @@ -63,8 +68,16 @@ def calculate_weights(correlations): containing the weights to be used in STEPS blending for each original component plus an addtional noise component, scale level, and optionally along [y, x] dimensions. + + Notes + ----- + The weights in the BPS method can sum op to more than 1.0. Hence, the + blended cascade has the be (re-)normalized (mu = 0, sigma = 1.0) first + before the blended cascade can be recomposed. """ # correlations: [component, scale, ...] + # Check if the correlations are positive, otherwise rho = 10e-5 + correlations = np.where(correlations < 10e-5, 10e-5, correlations) # Calculate weights for each source ratios = calculate_ratios(correlations) # ratios: [component, scale, ...] @@ -72,14 +85,10 @@ def calculate_weights(correlations): # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 weights = correlations * np.sqrt(ratios/total_ratios) # weights: [component, scale, ...] - # Calculate the weight of the noise component. This is different from - # BPS 2006, as the sum of the weights should equal 1 here. - noise_weight = 1.0 - np.sum(weights, axis=0) - # noise_weight: [scale, ...] - - # Original BPS2006 method in the following two lines - # total_square_weights = np.sum(np.square(weights), axis=0) - # noise_weight = np.sqrt(1.0 - total_square_weights) + # Calculate the weight of the noise component. + # Original BPS2006 method in the following two lines (eq. 13) + total_square_weights = np.sum(np.square(weights), axis=0) + noise_weight = np.sqrt(1.0 - total_square_weights) #TODO: determine the weights method and/or add different functions # Finally, add the noise_weights to the weights variable. @@ -87,6 +96,8 @@ def calculate_weights(correlations): return weights +#TODO: Make sure that where the radar rainfall data has no data, the NWP +# data is used. def blend_cascades(cascades_norm, weights): """Calculate blended normalized cascades using STEPS weights following eq. 10 in :cite:`BPS2006`. @@ -124,17 +135,17 @@ def blend_cascades(cascades_norm, weights): def blend_means_sigmas(means, sigmas, weights): """Calculate the blended means and sigmas, the normalization parameters - needed to recompose the cascade. This procedure is similar to the - blending of the normalized cascades and uses the same weights. + needed to recompose the cascade. This procedure uses the weights of the + blending of the normalized cascades and follows eq. 32 and 33 in BPS2004. Parameters ---------- means : array-like - Array of shape [number_components + 1, scale_level, ...] + Array of shape [number_components, scale_level, ...] with the mean for each component (NWP, nowcasts, noise). sigmas : array-like - Array of shape [number_components + 1, scale_level, ...] + Array of shape [number_components, scale_level, ...] with the standard deviation for each component. weights : array-like An array of shape [number_components + 1, scale_level, ...] @@ -154,7 +165,7 @@ def blend_means_sigmas(means, sigmas, weights): similar to combined_means, but containing the standard deviations. """ - # Check if the dimensions are the same + # Check if the dimensions are the same diff_dims = weights.ndim - means.ndim if diff_dims: for i in range(diff_dims): @@ -163,10 +174,22 @@ def blend_means_sigmas(means, sigmas, weights): if diff_dims: for i in range(diff_dims): sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) + # Weight should have one component more (the noise component) than the + # means and sigmas. Check this + if weights.shape[0] - means.shape[0] != 1 or weights.shape[0] - sigmas.shape[0] != 1: + raise ValueError("The weights array does not have one (noise) component more than mu and sigma") + else: + # Throw away the last component, which is the noise component + weights = weights[:-1] + # Combine (blend) the means and sigmas - combined_means = np.sum((weights * means), axis=0) - combined_sigmas = np.sum((weights * sigmas), axis=0) - #TODO: substract covarainces to weigthed sigmas + combined_means = np.zeros(weights.shape[1]) + combined_sigmas = np.zeros(weights.shape[1]) + total_weight = np.sum((weights), axis=0) + for i in range(weights.shape[0]): + combined_means += (weights[i] / total_weight) * means[i] + combined_sigmas += (weights[i] / total_weight) * sigmas[i] + #TODO: substract covarainces to weigthed sigmas - still necessary? return combined_means, combined_sigmas @@ -193,10 +216,18 @@ def recompose_cascade(combined_cascade, combined_mean, combined_sigma): Returns ------- - out : TYPE - DESCRIPTION. - + out: array-like + A two-dimensional array containing the recomposed cascade. + + Notes + ----- + The combined_cascade is made with weights that do not have to sum up to + 1.0. Therefore, the combined_cascade is first normalized again using + pysteps.blending.steps.normalize_cascade. """ + # First, normalize the combined_cascade again + combined_cascade = utils.normalize_cascade(combined_cascade) + # Now, renormalize it with the blended sigma and mean values renorm = (combined_cascade * combined_sigma) + combined_mean # print(renorm.shape) out = np.sum(renorm, axis=0) diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index b47e18e94..f18da4bfb 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -9,7 +9,8 @@ :toctree: ../generated/ stack_cascades - recompose_cascade + normalize_cascade + blend_optical_flows """ import numpy as np @@ -55,6 +56,31 @@ def stack_cascades(R_d, donorm=True): return np.stack(R_c), np.stack(mu_c), np.stack(sigma_c) +def normalize_cascade(cascade): + """Normalizes a cascade (again). + + Parameters + ---------- + cascade : array-like + An array of shape [scale_level, y, x] + containing per scale level a cascade that has to be normalized (again). + + Returns + ------- + out : array-like + The normalized cascade with the same shape as cascade. + + """ + # Determine the mean and standard dev. of the combined cascade + mu = np.mean(cascade, axis=(1,2)) + sigma = np.std(cascade, axis=(1,2)) + # Normalize the cascade + out = [(cascade[i] - mu[i]) / sigma[i] for i in range(cascade.shape[0])] + out = np.stack(out) + + return out + + def blend_optical_flows(flows, weights): """Combine advection fields using given weights. From 1b15c4b530ae2f6275ebffe5bee3218eb552235f Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Thu, 12 Aug 2021 17:01:01 +0200 Subject: [PATCH 09/17] Reformat code with Black --- pysteps/blending/skill_scores.py | 99 +++++++++++++++--------------- pysteps/blending/steps.py | 92 +++++++++++++++------------- pysteps/blending/utils.py | 13 ++-- pysteps/tests/helpers.py | 102 +++++++++++++++---------------- setup.py | 2 +- 5 files changed, 157 insertions(+), 151 deletions(-) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 672797a52..3fb155d14 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -19,11 +19,11 @@ def spatial_correlation(obs, mod): - """Determine the spatial correlation between the cascade of the latest - available observed (radar) rainfall field and a time-synchronous cascade - derived from a model (generally NWP) field. Both fields are assumed to use + """Determine the spatial correlation between the cascade of the latest + available observed (radar) rainfall field and a time-synchronous cascade + derived from a model (generally NWP) field. Both fields are assumed to use the same grid. - + Parameters ---------- @@ -38,9 +38,9 @@ def spatial_correlation(obs, mod): ------- rho : array-like Array of shape [n_cascade_levels] containing per cascade_level the - correlation between the normalized cascade of the observed (radar) + correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. - + References ---------- :cite:`BPS2006` @@ -49,24 +49,26 @@ def spatial_correlation(obs, mod): """ rho = [] # Fill rho per cascade level, so loop through the cascade levels - for cascade_level in range(0,obs.shape[0]): + for cascade_level in range(0, obs.shape[0]): # Flatten both arrays obs_1d = obs[cascade_level, :, :].flatten() mod_1d = mod[cascade_level, :, :].flatten() # Calculate the correlation between the two - cov = np.sum((mod_1d - np.mean(mod_1d))*(obs_1d - np.mean(obs_1d))) # Without 1/n, as this cancels out (same for stdx and -y) - std_obs = np.sqrt(np.sum((obs_1d - np.mean(obs_1d))**2.0)) - std_mod = np.sqrt(np.sum((mod_1d - np.mean(mod_1d))**2.0)) + cov = np.sum( + (mod_1d - np.mean(mod_1d)) * (obs_1d - np.mean(obs_1d)) + ) # Without 1/n, as this cancels out (same for stdx and -y) + std_obs = np.sqrt(np.sum((obs_1d - np.mean(obs_1d)) ** 2.0)) + std_mod = np.sqrt(np.sum((mod_1d - np.mean(mod_1d)) ** 2.0)) rho.append(cov / (std_mod * std_obs)) - + return rho def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): """Determine the correlation of a model field for lead time lt and - cascade k, by assuming that the correlation determined at t=0 regresses + cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. - + Parameters ---------- @@ -74,7 +76,7 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): The lead time of the forecast in minutes. correlations : array-like Array of shape [n_cascade_levels] containing per cascade_level the - correlation between the normalized cascade of the observed (radar) + correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. clim_regr_file : str, optional The location of the file with the climatological correlation values @@ -83,9 +85,9 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): Returns ------- rho : array-like - Array of shape [n_cascade_levels] containing, for lead time lt, per - cascade_level the correlation between the normalized cascade of the - observed (radar) rainfall field and the normalized cascade of the + Array of shape [n_cascade_levels] containing, for lead time lt, per + cascade_level the correlation between the normalized cascade of the + observed (radar) rainfall field and the normalized cascade of the model field. References @@ -93,24 +95,22 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): :cite:`BPS2004` :cite:`BPS2006` """ - # Obtain the climatological values towards which the correlations will + # Obtain the climatological values towards which the correlations will # regress - clim_cor_values, regr_pars = clim_regr_values(len(correlations), - clim_regr_file - ) + clim_cor_values, regr_pars = clim_regr_values(len(correlations), clim_regr_file) # Determine the speed of the regression (eq. 24 in BPS2004) qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) # Determine the correlation for lead time lt rho = qm * correlations + (1 - qm) * clim_cor_values - + return rho def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=None): - """Determine the correlation of the extrapolation (nowcast) component for - lead time lt and cascade k, by assuming that the correlation determined at + """Determine the correlation of the extrapolation (nowcast) component for + lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. - + Parameters ---------- @@ -119,7 +119,7 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non cascade level the autoregression parameters. correlations : array-like, optional Array of shape [n_cascade_levels] containing per cascade_level the - latest available correlation from the extrapolation component that can + latest available correlation from the extrapolation component that can be found from the AR-2 model. correlations_prev : array-like, optional Similar to correlations, but from the timestep before that. @@ -127,9 +127,9 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non Returns ------- rho : array-like - Array of shape [n_cascade_levels] containing, for lead time lt, per + Array of shape [n_cascade_levels] containing, for lead time lt, per cascade_level the correlation of the extrapolation component. - + References ---------- :cite:`BPS2004` @@ -139,26 +139,27 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non # Check if correlations_prev exists, if not, we set it to 1.0 if correlations_prev is None: correlations_prev = np.repeat(1.0, PHI.shape[0]) - # Same for correlations at first time step, we set it to + # Same for correlations at first time step, we set it to # phi1 / (1 - phi2), see BPS2004 if correlations is None: correlations = PHI[:, 0] / (1.0 - PHI[:, 1]) - + # Calculate the correlation for lead time lt rho = PHI[:, 0] * correlations + PHI[:, 1] * correlations_prev - + # Finally, set the current correlations array as the previous one for the # next time step rho_prev = correlations - + return rho, rho_prev -#TODO: Make sure the initial values also work for n_cascade_levels != 8. + +# TODO: Make sure the initial values also work for n_cascade_levels != 8. def clim_regr_values(n_cascade_levels, clim_regr_file=None): """Obtains the climatological correlation values and regression parameters from a file called ... in ... If this file is not present yet, the values - from :cite:`BPS2004`. - + from :cite:`BPS2004`. + Parameters ---------- @@ -176,33 +177,33 @@ def clim_regr_values(n_cascade_levels, clim_regr_file=None): coefficients, obtained by calling a method implemented in pysteps.blending.skill_scores.get_clim_skill_scores. regr_pars : array-like - Array of shape [2, n_cascade_levels] containing the regression - parameters for the auto-correlation coefficients, obtained by calling + Array of shape [2, n_cascade_levels] containing the regression + parameters for the auto-correlation coefficients, obtained by calling a method implemented in pysteps.blending.skill_scores.get_clim_skill_scores. """ if clim_regr_file is not None: - #TODO: Finalize this function when the I/O skill file system has been + # TODO: Finalize this function when the I/O skill file system has been # set up (work in progress by Lesley). - + # 1. Open the file - + # 2. Get the climatological correlation values and the regression # parameters clim_cor_values = ... regr_pars = ... - + else: # Get the values from BPS2004 clim_cor_values = np.array( - [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, - 0.0052, 0.0040] - ) + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] + ) regr_pars = np.array( - [[130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], - [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4] - ] - ) - + [ + [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], + ] + ) + return clim_cor_values, regr_pars diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index f20e96062..11f92b2c0 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -27,13 +27,14 @@ import numpy as np from pysteps.blending import utils + def calculate_ratios(correlations): """Calculate explained variance ratios from correlation. Parameters ---------- Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), + containing correlation (skills) for each component (NWP and nowcast), scale level, and optionally along [y, x] dimensions. Returns @@ -44,7 +45,7 @@ def calculate_ratios(correlations): """ # correlations: [component, scale, ...] square_corrs = np.square(correlations) - # Calculate the ratio of the explained variance to the unexplained + # Calculate the ratio of the explained variance to the unexplained # variance of the nowcast and NWP model components out = square_corrs / (1 - square_corrs) # out: [component, scale, ...] @@ -58,7 +59,7 @@ def calculate_weights(correlations): ---------- correlations : array-like Array of shape [component, scale_level, ...] - containing correlation (skills) for each component (NWP and nowcast), + containing correlation (skills) for each component (NWP and nowcast), scale level, and optionally along [y, x] dimensions. Returns @@ -66,13 +67,13 @@ def calculate_weights(correlations): weights : array-like Array of shape [component+1, scale_level, ...] containing the weights to be used in STEPS blending for - each original component plus an addtional noise component, scale level, + each original component plus an addtional noise component, scale level, and optionally along [y, x] dimensions. - + Notes ----- - The weights in the BPS method can sum op to more than 1.0. Hence, the - blended cascade has the be (re-)normalized (mu = 0, sigma = 1.0) first + The weights in the BPS method can sum op to more than 1.0. Hence, the + blended cascade has the be (re-)normalized (mu = 0, sigma = 1.0) first before the blended cascade can be recomposed. """ # correlations: [component, scale, ...] @@ -82,47 +83,47 @@ def calculate_weights(correlations): ratios = calculate_ratios(correlations) # ratios: [component, scale, ...] total_ratios = np.sum(ratios, axis=0) - # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 - weights = correlations * np.sqrt(ratios/total_ratios) + # total_ratios: [scale, ...] - the denominator of eq. 11 & 12 in BPS2006 + weights = correlations * np.sqrt(ratios / total_ratios) # weights: [component, scale, ...] - # Calculate the weight of the noise component. + # Calculate the weight of the noise component. # Original BPS2006 method in the following two lines (eq. 13) total_square_weights = np.sum(np.square(weights), axis=0) - noise_weight = np.sqrt(1.0 - total_square_weights) - #TODO: determine the weights method and/or add different functions - - # Finally, add the noise_weights to the weights variable. + noise_weight = np.sqrt(1.0 - total_square_weights) + # TODO: determine the weights method and/or add different functions + + # Finally, add the noise_weights to the weights variable. weights = np.concatenate((weights, noise_weight[None, ...]), axis=0) return weights -#TODO: Make sure that where the radar rainfall data has no data, the NWP +# TODO: Make sure that where the radar rainfall data has no data, the NWP # data is used. def blend_cascades(cascades_norm, weights): - """Calculate blended normalized cascades using STEPS weights following eq. + """Calculate blended normalized cascades using STEPS weights following eq. 10 in :cite:`BPS2006`. - + Parameters ---------- cascades_norm : array-like Array of shape [number_components + 1, scale_level, ...] - with normalized cascades components for each component - (NWP, nowcasts, noise) and scale level, obtained by calling a method + with normalized cascades components for each component + (NWP, nowcasts, noise) and scale level, obtained by calling a method implemented in pysteps.blending.utils.stack_cascades weights : array-like An array of shape [number_components + 1, scale_level, ...] containing the weights to be used in this routine - for each component plus noise, scale level, and optionally [y, x] + for each component plus noise, scale level, and optionally [y, x] dimensions, obtained by calling a method implemented in pysteps.blending.steps.calculate_weights Returns ------- combined_cascade : array-like - An array of shape [scale_level, y, x] - containing per scale level (cascade) the weighted combination of - cascades from multiple components (NWP, nowcasts and noise) to be used + An array of shape [scale_level, y, x] + containing per scale level (cascade) the weighted combination of + cascades from multiple components (NWP, nowcasts and noise) to be used in STEPS blending. """ # cascade_norm component, scales, y, x @@ -134,10 +135,10 @@ def blend_cascades(cascades_norm, weights): def blend_means_sigmas(means, sigmas, weights): - """Calculate the blended means and sigmas, the normalization parameters + """Calculate the blended means and sigmas, the normalization parameters needed to recompose the cascade. This procedure uses the weights of the blending of the normalized cascades and follows eq. 32 and 33 in BPS2004. - + Parameters ---------- @@ -150,22 +151,22 @@ def blend_means_sigmas(means, sigmas, weights): weights : array-like An array of shape [number_components + 1, scale_level, ...] containing the weights to be used in this routine - for each component plus noise, scale level, and optionally [y, x] + for each component plus noise, scale level, and optionally [y, x] dimensions, obtained by calling a method implemented in pysteps.blending.steps.calculate_weights Returns ------- combined_means : array-like - An array of shape [scale_level, ...] - containing per scale level (cascade) the weighted combination of + An array of shape [scale_level, ...] + containing per scale level (cascade) the weighted combination of means from multiple components (NWP, nowcasts and noise). combined_sigmas : array-like - An array of shape [scale_level, ...] + An array of shape [scale_level, ...] similar to combined_means, but containing the standard deviations. """ - # Check if the dimensions are the same + # Check if the dimensions are the same diff_dims = weights.ndim - means.ndim if diff_dims: for i in range(diff_dims): @@ -174,14 +175,19 @@ def blend_means_sigmas(means, sigmas, weights): if diff_dims: for i in range(diff_dims): sigmas = np.expand_dims(sigmas, axis=sigmas.ndim) - # Weight should have one component more (the noise component) than the + # Weight should have one component more (the noise component) than the # means and sigmas. Check this - if weights.shape[0] - means.shape[0] != 1 or weights.shape[0] - sigmas.shape[0] != 1: - raise ValueError("The weights array does not have one (noise) component more than mu and sigma") + if ( + weights.shape[0] - means.shape[0] != 1 + or weights.shape[0] - sigmas.shape[0] != 1 + ): + raise ValueError( + "The weights array does not have one (noise) component more than mu and sigma" + ) else: # Throw away the last component, which is the noise component weights = weights[:-1] - + # Combine (blend) the means and sigmas combined_means = np.zeros(weights.shape[1]) combined_sigmas = np.zeros(weights.shape[1]) @@ -189,28 +195,28 @@ def blend_means_sigmas(means, sigmas, weights): for i in range(weights.shape[0]): combined_means += (weights[i] / total_weight) * means[i] combined_sigmas += (weights[i] / total_weight) * sigmas[i] - #TODO: substract covarainces to weigthed sigmas - still necessary? + # TODO: substract covarainces to weigthed sigmas - still necessary? return combined_means, combined_sigmas def recompose_cascade(combined_cascade, combined_mean, combined_sigma): - """ Recompose the cascades into a transformed rain rate field. - + """Recompose the cascades into a transformed rain rate field. + Parameters ---------- combined_cascade : array-like - An array of shape [scale_level, y, x] - containing per scale level (cascade) the weighted combination of - cascades from multiple components (NWP, nowcasts and noise) to be used + An array of shape [scale_level, y, x] + containing per scale level (cascade) the weighted combination of + cascades from multiple components (NWP, nowcasts and noise) to be used in STEPS blending. combined_mean : array-like - An array of shape [scale_level, ...] + An array of shape [scale_level, ...] similar to combined_cascade, but containing the normalization parameter mean. combined_sigma : array-like - An array of shape [scale_level, ...] + An array of shape [scale_level, ...] similar to combined_cascade, but containing the normalization parameter standard deviation. @@ -218,7 +224,7 @@ def recompose_cascade(combined_cascade, combined_mean, combined_sigma): ------- out: array-like A two-dimensional array containing the recomposed cascade. - + Notes ----- The combined_cascade is made with weights that do not have to sum up to diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index f18da4bfb..05ee2121c 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -57,12 +57,12 @@ def stack_cascades(R_d, donorm=True): def normalize_cascade(cascade): - """Normalizes a cascade (again). + """Normalizes a cascade (again). Parameters ---------- cascade : array-like - An array of shape [scale_level, y, x] + An array of shape [scale_level, y, x] containing per scale level a cascade that has to be normalized (again). Returns @@ -72,12 +72,12 @@ def normalize_cascade(cascade): """ # Determine the mean and standard dev. of the combined cascade - mu = np.mean(cascade, axis=(1,2)) - sigma = np.std(cascade, axis=(1,2)) + mu = np.mean(cascade, axis=(1, 2)) + sigma = np.std(cascade, axis=(1, 2)) # Normalize the cascade - out = [(cascade[i] - mu[i]) / sigma[i] for i in range(cascade.shape[0])] + out = [(cascade[i] - mu[i]) / sigma[i] for i in range(cascade.shape[0])] out = np.stack(out) - + return out @@ -135,4 +135,3 @@ def blend_optical_flows(flows, weights): combined_flows = np.sum(all_c_wn, axis=-1) # combined_flows [2, m, n] return combined_flows - diff --git a/pysteps/tests/helpers.py b/pysteps/tests/helpers.py index c7501b57d..86c59b663 100644 --- a/pysteps/tests/helpers.py +++ b/pysteps/tests/helpers.py @@ -37,76 +37,76 @@ def get_precipitation_fields( **importer_kwargs, ): """ - Get a precipitation field from the archive to be used as reference. + Get a precipitation field from the archive to be used as reference. - Source: bom - Reference time: 2018/06/16 10000 UTC + Source: bom + Reference time: 2018/06/16 10000 UTC - Source: fmi - Reference time: 2016/09/28 1600 UTC + Source: fmi + Reference time: 2016/09/28 1600 UTC - Source: knmi - Reference time: 2010/08/26 0000 UTC + Source: knmi + Reference time: 2010/08/26 0000 UTC - Source: mch - Reference time: 2015/05/15 1630 UTC + Source: mch + Reference time: 2015/05/15 1630 UTC - Source: opera - Reference time: 2018/08/24 1800 UTC + Source: opera + Reference time: 2018/08/24 1800 UTC - Source: saf - Reference time: 2018/06/01 0700 UTC + Source: saf + Reference time: 2018/06/01 0700 UTC - Source: mrms - Reference time: 2019/06/10 0000 UTC + Source: mrms + Reference time: 2019/06/10 0000 UTC - Parameters - ---------- + Parameters + ---------- - num_prev_files: int, optional - Number of previous times (files) to return with respect to the - reference time. + num_prev_files: int, optional + Number of previous times (files) to return with respect to the + reference time. - num_next_files: int, optional - Number of future times (files) to return with respect to the - reference time. + num_next_files: int, optional + Number of future times (files) to return with respect to the + reference time. - return_raw: bool, optional - Do not preprocess the precipitation fields. False by default. - The pre-processing steps are: 1) Convert to mm/h, - 2) Mask invalid values, 3) Log-transform the data [dBR]. + return_raw: bool, optional + Do not preprocess the precipitation fields. False by default. + The pre-processing steps are: 1) Convert to mm/h, + 2) Mask invalid values, 3) Log-transform the data [dBR]. -<<<<<<< HEAD -======= - metadata: bool, optional - If True, also return file metadata. + <<<<<<< HEAD + ======= + metadata: bool, optional + If True, also return file metadata. - clip: scalars (left, right, bottom, top), optional - The extent of the bounding box in data coordinates to be used to clip - the data. + clip: scalars (left, right, bottom, top), optional + The extent of the bounding box in data coordinates to be used to clip + the data. ->>>>>>> master - upscale: float or None, optional - Upscale fields in space during the pre-processing steps. - If it is None, the precipitation field is not modified. - If it is a float, represents the length of the space window that is - used to upscale the fields. + >>>>>>> master + upscale: float or None, optional + Upscale fields in space during the pre-processing steps. + If it is None, the precipitation field is not modified. + If it is a float, represents the length of the space window that is + used to upscale the fields. - source: {"bom", "fmi" , "knmi", "mch", "opera", "saf", "mrms"}, optional - Name of the data source to be used. + source: {"bom", "fmi" , "knmi", "mch", "opera", "saf", "mrms"}, optional + Name of the data source to be used. - log_transform: bool - Whether to transform the output to dB. + log_transform: bool + Whether to transform the output to dB. - Other Parameters - ---------------- + Other Parameters + ---------------- - importer_kwargs : dict - Additional keyword arguments passed to the importer. + importer_kwargs : dict + Additional keyword arguments passed to the importer. - Returns - ------- - data_array : xr.DataArray + Returns + ------- + data_array : xr.DataArray """ if source == "bom": diff --git a/setup.py b/setup.py index 481900cf4..5542775c6 100644 --- a/setup.py +++ b/setup.py @@ -67,7 +67,7 @@ "matplotlib", "jsonschema", "xarray", - "bottleneck" + "bottleneck", ] setup( From add0931e6f0aef0ea33baab4c65b407b08675e69 Mon Sep 17 00:00:00 2001 From: Ruben Imhoff Date: Fri, 13 Aug 2021 11:09:27 +0200 Subject: [PATCH 10/17] Skill score script imports climatological correlation-values from file now --- pysteps/blending/skill_scores.py | 82 +++++++++++++++++++++----------- 1 file changed, 54 insertions(+), 28 deletions(-) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 3fb155d14..3057cdfb0 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -16,6 +16,7 @@ """ import numpy as np +from pysteps.blending import clim def spatial_correlation(obs, mod): @@ -154,20 +155,20 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non return rho, rho_prev -# TODO: Make sure the initial values also work for n_cascade_levels != 8. -def clim_regr_values(n_cascade_levels, clim_regr_file=None): +def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=None): """Obtains the climatological correlation values and regression parameters - from a file called ... in ... If this file is not present yet, the values - from :cite:`BPS2004`. + from a file called NWP_weights_window.bin in the outdir_path. If this file + is not present yet, the values from :cite:`BPS2004` are used. Parameters ---------- n_cascade_levels : int The number of cascade levels to use. - clim_regr_file : str, optional - Location of the file with the climatological correlation values - and regression parameters. + outdir_path: str + Path to folder where the historical weights are stored. + skill_kwargs : dict, optional + Dictionary containing e.g. the nmodels and window_length parameters. Returns ------- @@ -178,32 +179,57 @@ def clim_regr_values(n_cascade_levels, clim_regr_file=None): pysteps.blending.skill_scores.get_clim_skill_scores. regr_pars : array-like Array of shape [2, n_cascade_levels] containing the regression - parameters for the auto-correlation coefficients, obtained by calling - a method implemented in - pysteps.blending.skill_scores.get_clim_skill_scores. + parameters. These are fixed values that should be hard-coded in this + function. Unless changed by the user, the standard values from + `BPS2004` are used. + + Notes + ----- + The literature climatological values assume 8 cascade levels. In case + less or more cascade levels are used, the script will handle this by taking + the first n values or extending the array with a small value. This is not + ideal, but will be fixed once the clim_regr_file is made. Hence, this + requires a warm-up period of the model. + In addition, the regression parameter values (eq. 24 in BPS2004) are hard- + coded and can only be optimized by the user after (re)fitting of the + equation. """ - if clim_regr_file is not None: - # TODO: Finalize this function when the I/O skill file system has been - # set up (work in progress by Lesley). - - # 1. Open the file - - # 2. Get the climatological correlation values and the regression - # parameters - clim_cor_values = ... - regr_pars = ... - - else: - # Get the values from BPS2004 + # First, obtain climatological skill values + try: + clim_cor_values = clim.calc_clim_weights( + outdir_path, n_cascade_levels, **skill_kwargs + ) + except FileNotFoundError: + # The climatological skill values file does not exist yet, so we'll + # use the default values from BPS2004. clim_cor_values = np.array( [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] ) - regr_pars = np.array( - [ - [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], - [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], - ] + + # Check if clim_cor_values has only one dimension, otherwise it has + # returned the skill values for multiple models + if clim_cor_values.ndim != 1: + raise IndexError( + "The climatological cor. values of multiple models were returned, but only one model should be specified. Make sure to pass the argument nmodels in the function" ) + # Also check whether the number of cascade_levels is correct + if clim_cor_values.shape[0] > n_cascade_levels: + clim_cor_values = clim_cor_values[0:n_cascade_levels] + elif clim_cor_values.shape[0] < n_cascade_levels: + # Get the number of cascade levels that is missing + n_extra_lev = n_cascade_levels - clim_cor_values.shape[0] + # Append the array with correlation values of 10e-4 + clim_cor_values = np.append(clim_cor_values, np.repeat(10e-4, n_extra_lev)) + + # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded, + # change to own values when present. + regr_pars = np.array( + [ + [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], + ] + ) + return clim_cor_values, regr_pars From 79a4e61964f8b9a6fb59b6d164f80c0bda949f79 Mon Sep 17 00:00:00 2001 From: RubenImhoff Date: Fri, 13 Aug 2021 12:05:09 +0200 Subject: [PATCH 11/17] Small changes to skill score script --- pysteps/blending/skill_scores.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 3057cdfb0..8046dfe11 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -65,7 +65,7 @@ def spatial_correlation(obs, mod): return rho -def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): +def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): """Determine the correlation of a model field for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. @@ -79,9 +79,10 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): Array of shape [n_cascade_levels] containing per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. - clim_regr_file : str, optional - The location of the file with the climatological correlation values - and regression parameters. + outdir_path: str + Path to folder where the historical weights are stored. + skill_kwargs : dict, optional + Dictionary containing e.g. the nmodels and window_length parameters. Returns ------- @@ -98,7 +99,9 @@ def lt_dependent_cor_nwp(lt, correlations, clim_regr_file=None): """ # Obtain the climatological values towards which the correlations will # regress - clim_cor_values, regr_pars = clim_regr_values(len(correlations), clim_regr_file) + clim_cor_values, regr_pars = clim_regr_values( + len(correlations), outdir_path, skill_kwargs=None + ) # Determine the speed of the regression (eq. 24 in BPS2004) qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) # Determine the correlation for lead time lt @@ -155,7 +158,7 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non return rho, rho_prev -def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=None): +def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): """Obtains the climatological correlation values and regression parameters from a file called NWP_weights_window.bin in the outdir_path. If this file is not present yet, the values from :cite:`BPS2004` are used. From 694c9d3d1f95d279f5f34120ed1b0970b4ba210c Mon Sep 17 00:00:00 2001 From: RubenImhoff Date: Mon, 16 Aug 2021 11:19:39 +0200 Subject: [PATCH 12/17] Add skill score tests and an interface --- pysteps/blending/skill_scores.py | 1 + pysteps/blending/steps.py | 7 +++++++ 2 files changed, 8 insertions(+) diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index 8046dfe11..f8225959f 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -228,6 +228,7 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded, # change to own values when present. + # TODO: Regression pars for different cascade levels regr_pars = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], diff --git a/pysteps/blending/steps.py b/pysteps/blending/steps.py index 11f92b2c0..2c053dc14 100644 --- a/pysteps/blending/steps.py +++ b/pysteps/blending/steps.py @@ -11,6 +11,7 @@ .. autosummary:: :toctree: ../generated/ + forecast calculate_ratios calculate_weights blend_cascades @@ -28,6 +29,12 @@ from pysteps.blending import utils +def forecast(): + # TODO: .. + + return ... + + def calculate_ratios(correlations): """Calculate explained variance ratios from correlation. From 6e3db59a8a21999a8548b8e5de34ecb9ca14030e Mon Sep 17 00:00:00 2001 From: RubenImhoff Date: Mon, 16 Aug 2021 11:20:10 +0200 Subject: [PATCH 13/17] Add skill score tests and an interface --- pysteps/blending/__init__.py | 4 + pysteps/blending/interface.py | 56 ++++ pysteps/tests/test_blending_skill_scores.py | 282 ++++++++++++++++++++ 3 files changed, 342 insertions(+) create mode 100644 pysteps/blending/__init__.py create mode 100644 pysteps/blending/interface.py create mode 100644 pysteps/tests/test_blending_skill_scores.py diff --git a/pysteps/blending/__init__.py b/pysteps/blending/__init__.py new file mode 100644 index 000000000..420a11bd0 --- /dev/null +++ b/pysteps/blending/__init__.py @@ -0,0 +1,4 @@ +# -*- coding: utf-8 -*- +"""Methods for blending NWP model(s) with nowcasts.""" + +from pysteps.blending.interface import get_method diff --git a/pysteps/blending/interface.py b/pysteps/blending/interface.py new file mode 100644 index 000000000..0486188c7 --- /dev/null +++ b/pysteps/blending/interface.py @@ -0,0 +1,56 @@ +# -*- coding: utf-8 -*- +""" +pysteps.blending.interface +========================== + +Interface for the blending module. It returns a callable function for computing +blended nowcasts with NWP models. + +.. autosummary:: + :toctree: ../generated/ + + get_method +""" + +from pysteps.blending import steps + +_blending_methods = dict() +_blending_methods["steps"] = steps.forecast + + +def get_method(name): + """Return a callable function for computing nowcasts. + + Description: + Return a callable function for computing deterministic or ensemble + precipitation nowcasts. + + Implemented methods: + + +-----------------+-------------------------------------------------------+ + | Name | Description | + +=================+=======================================================+ + +-----------------+-------------------------------------------------------+ + | steps | the STEPS stochastic nowcasting blending method | + | | described in :cite:`Seed2003`, :cite:`BPS2006` and | + | | :cite:`SPN2013`. The blending weights approach | + | | currently follows :cite:`BPS2006`. | + +-----------------+-------------------------------------------------------+ + """ + if isinstance(name, str): + name = name.lower() + else: + raise TypeError( + "Only strings supported for the method's names.\n" + + "Available names:" + + str(list(_blending_methods.keys())) + ) from None + + try: + return _blending_methods[name] + except KeyError: + raise ValueError( + "Unknown blending method {}\n".format(name) + + "The available methods are:" + + str(list(_blending_methods.keys())) + ) from None diff --git a/pysteps/tests/test_blending_skill_scores.py b/pysteps/tests/test_blending_skill_scores.py new file mode 100644 index 000000000..ced452fbc --- /dev/null +++ b/pysteps/tests/test_blending_skill_scores.py @@ -0,0 +1,282 @@ +# -*- coding: utf-8 -*- + +import numpy as np +import pytest +from numpy.testing import assert_array_almost_equal + +from pysteps.blending.skill_scores import ( + spatial_correlation, + lt_dependent_cor_nwp, + lt_dependent_cor_extrapolation, + clim_regr_values, +) + +# TODO: Fix tests for xarray fields + +# Set the climatological correlations values +clim_cor_values_8lev = np.array( + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] +) +clim_cor_values_6lev = np.array([0.848, 0.537, 0.237, 0.065, 0.020, 0.0044]) +clim_cor_values_9lev = np.array( + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 10e-4] +) + +# Set the regression values +regr_pars_8lev = np.array( + [ + [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4], + ] +) +regr_pars_6lev = np.array( + [ + [130.0, 165.0, 120.0, 55.0, 50.0, 15.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4], + ] +) +regr_pars_9lev = np.array( + [ + [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0, 10.0], + [155.0, 220.0, 200.0, 75.0, 10e4, 10e4, 10e4, 10e4, 10e4], + ] +) + +# Set the dummy observation and model values +dummy_2d_array = np.array([[1, 2], [3, 4]]) +obs_8lev = np.repeat(dummy_2d_array[None, :, :], 8, axis=0) +obs_6lev = np.repeat(dummy_2d_array[None, :, :], 6, axis=0) +obs_9lev = np.repeat(dummy_2d_array[None, :, :], 9, axis=0) +mod_8lev = np.repeat(dummy_2d_array[None, :, :], 8, axis=0) +mod_6lev = np.repeat(dummy_2d_array[None, :, :], 6, axis=0) +mod_9lev = np.repeat(dummy_2d_array[None, :, :], 9, axis=0) + +# Gives some dummy values to PHI +dummy_phi = np.array([0.472650, 0.523825, 0.103454]) +PHI_8lev = np.repeat(dummy_phi[None, :], 8, axis=0) +PHI_6lev = np.repeat(dummy_phi[None, :], 6, axis=0) +PHI_9lev = np.repeat(dummy_phi[None, :], 9, axis=0) + +# Test function arguments +skill_scores_arg_names = ( + "obs", + "mod", + "lt", + "PHI", + "cor_prev", + "clim_cor_values", + "regr_pars", + "n_cascade_levels", + "expected_cor_t0", + "expected_cor_nwp_lt", + "expected_cor_nowcast_lt", +) + +# Test function values +skill_scores_arg_values = [ + ( + obs_8lev, + mod_8lev, + 60, + PHI_8lev, + None, + clim_cor_values_8lev, + regr_pars_8lev, + 8, + np.repeat(1.0, 8), + np.array( + [ + 0.97455941, + 0.9356775, + 0.81972779, + 0.55202975, + 0.31534738, + 0.02264599, + 0.02343133, + 0.00647032, + ] + ), + np.array( + [ + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + ] + ), + ), + ( + obs_6lev, + mod_6lev, + 60, + PHI_6lev, + None, + clim_cor_values_6lev, + regr_pars_6lev, + 6, + np.repeat(1.0, 6), + np.array( + [0.97455941, 0.9356775, 0.81972779, 0.55202975, 0.31534738, 0.02264599] + ), + np.array([0.996475, 0.996475, 0.996475, 0.996475, 0.996475, 0.996475]), + ), + ( + obs_9lev, + mod_9lev, + 60, + PHI_9lev, + None, + clim_cor_values_9lev, + regr_pars_9lev, + 9, + np.repeat(1.0, 9), + np.array( + [ + 0.97455941, + 0.9356775, + 0.81972779, + 0.55202975, + 0.31534738, + 0.02264599, + 0.02343133, + 0.00647032, + 0.00347776, + ] + ), + np.array( + [ + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + ] + ), + ), + ( + obs_8lev, + mod_8lev, + 0, + PHI_8lev, + None, + clim_cor_values_8lev, + regr_pars_8lev, + 8, + np.repeat(1.0, 8), + np.repeat(1.0, 8), + np.array( + [ + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + 0.996475, + ] + ), + ), +] + + +# The test +@pytest.mark.parametrize(skill_scores_arg_names, skill_scores_arg_values) + + +# TODO: Also test the pysteps.blending.clim.calc_clim_weights functionality in +# The test function to be used +def test_blending_skill_scores( + obs, + mod, + lt, + PHI, + cor_prev, + clim_cor_values, + regr_pars, + n_cascade_levels, + expected_cor_t0, + expected_cor_nwp_lt, + expected_cor_nowcast_lt, +): + """Tests if the skill_score functions behave correctly. A dummy gridded + model and observation field should be given for n_cascade_levels, which + leads to a given spatial correlation per cascade level. Then, the function + tests if the correlation regresses towards the climatological values given + lead time lt for the NWP fields or given the PHI-values for the + extrapolation field. + + """ + # Calculate the spatial correlation of the given model field + correlations_t0 = spatial_correlation(obs, mod) + + # Check if the field has the same number of cascade levels as the model + # field and as the given n_cascade_levels + assert ( + correlations_t0.shape[0] == mod.shape[0] + ), "Number of cascade levels should be the same as in the model field" + assert ( + correlations_t0.shape[0] == n_cascade_levels + ), "Number of cascade levels should be the same as n_cascade_levels" + + # Check if the returned values are as expected + assert_array_almost_equal( + correlations_t0, + expected_cor_t0, + "Returned spatial correlation is not the same as the expected value", + ) + + # Test if the NWP correlation regresses towards the correct value given + # a lead time in minutes + # First, check if the climatological values are returned correctly + correlations_clim, regr_clim = clim_regr_values(n_cascade_levels, "random_path") + assert ( + correlations_clim.shape[0] == n_cascade_levels + ), "Number of cascade levels should be the same as n_cascade_levels" + assert_array_almost_equal( + correlations_clim, + clim_cor_values, + "Not the correct climatological correlations were returned", + ) + assert_array_almost_equal( + regr_clim, regr_pars, "Not the correct regression parameters were returned" + ) + + # Then, check the regression of the correlation values + correlations_nwp_lt = lt_dependent_cor_nwp(lt, correlations_t0, "random_path") + assert ( + correlations_nwp_lt.shape[0] == mod.shape[0] + ), "Number of cascade levels should be the same as in the model field" + assert ( + correlations_nwp_lt.shape[0] == n_cascade_levels + ), "Number of cascade levels should be the same as n_cascade_levels" + assert_array_almost_equal( + correlations_nwp_lt, + expected_cor_nwp_lt, + "Correlations of NWP not equal to the expected correlations", + ) + + # Finally, make sure nowcast correlation regresses towards the correct + # value given some PHI-values. + correlations_nowcast_lt = lt_dependent_cor_extrapolation( + PHI, correlations_t0, cor_prev + ) + assert ( + correlations_nowcast_lt.shape[0] == mod.shape[0] + ), "Number of cascade levels should be the same as in the model field" + assert ( + correlations_nowcast_lt.shape[0] == n_cascade_levels + ), "Number of cascade levels should be the same as n_cascade_levels" + assert_array_almost_equal( + correlations_nowcast_lt, + expected_cor_nowcast_lt, + "Correlations of nowcast not equal to the expected correlations", + ) From 1fe5ff3501c3b8f21723099d8c1c8b6793bcdbdc Mon Sep 17 00:00:00 2001 From: RubenImhoff Date: Mon, 16 Aug 2021 11:55:09 +0200 Subject: [PATCH 14/17] Small change to docstring --- pysteps/blending/utils.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index 05ee2121c..041e9e1b6 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -82,7 +82,8 @@ def normalize_cascade(cascade): def blend_optical_flows(flows, weights): - """Combine advection fields using given weights. + """Combine advection fields using given weights. Following :cite:`BPS2006` + the second level of the cascade is used for the weights Parameters ---------- From c6368ab173991ed34c86f516004ac9033b20efd6 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Wed, 18 Aug 2021 14:37:01 +0200 Subject: [PATCH 15/17] Functions to store and compute climatological weights (#231) * Implement the functions get_default_weights, save_weights, calc_clim_weights. These functions are used to evolve the weights in the scale- and skill-dependent blending with NWP in the STEPS blending algorithm. The current weights, based on the correlations per cascade level, are regressed towards these climatological weights in the course of the forecast. These functions save the current and compute the climatological weights (a running mean of the weights of the past n days, where typically n=30). First daily averages are stored and these are then averaged over the running window of n days. * Add tests for pysteps climatological weight io and calculations. * Add path_workdir to outputs section in pystepsrc file and use it as a default path to store/retrieve blending weights. * Minor changes to docstrings, changes to skill scores and testing scripts * Completed documentation for blending clim module, cleanup. Co-authored-by: RubenImhoff --- .pre-commit-config.yaml | 2 +- pysteps/blending/clim.py | 188 ++++++++++++++++++++ pysteps/blending/skill_scores.py | 43 +++-- pysteps/pystepsrc | 3 +- pysteps/tests/test_blending_clim.py | 148 +++++++++++++++ pysteps/tests/test_blending_skill_scores.py | 31 ++-- 6 files changed, 385 insertions(+), 30 deletions(-) create mode 100644 pysteps/blending/clim.py create mode 100644 pysteps/tests/test_blending_clim.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c80571951..f35ad636b 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/psf/black - rev: 21.6b0 + rev: 21.7b0 hooks: - id: black language_version: python3 \ No newline at end of file diff --git a/pysteps/blending/clim.py b/pysteps/blending/clim.py new file mode 100644 index 000000000..fbb03e1ff --- /dev/null +++ b/pysteps/blending/clim.py @@ -0,0 +1,188 @@ +""" +pysteps.blending.clim +===================== + +Module with methods to read, write and compute past and climatological model weights. + +.. autosummary:: + :toctree: ../generated/ + + get_default_weights + save_weights + calc_clim_weights +""" + +import numpy as np +from os.path import exists, join +import pickle +from pysteps import rcparams + + +def get_default_weights(n_cascade_levels=8, n_models=1): + """ + Get the default weights as given in BPS2004. + Take subset of n_cascade_levels or add entries with small values (1e-4) if + n_cascade_levels differs from 8. + + Parameters + ---------- + n_cascade_levels: int, optional + Number of cascade levels. Defaults to 8. + n_models: int, optional + Number of NWP models. Defaults to 1. + + Returns + ------- + default_weights: array-like + Array of shape [model, scale_level] containing the climatological weights. + + """ + + default_weights = np.array( + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] + ) + n_weights = default_weights.shape[0] + if n_cascade_levels < n_weights: + default_weights = default_weights[0:n_cascade_levels] + elif n_cascade_levels > n_weights: + default_weights = np.append( + default_weights, np.repeat(1e-4, n_cascade_levels - n_weights) + ) + return np.resize(default_weights, (n_models, n_cascade_levels)) + + +def save_weights( + current_weights, + validtime, + outdir_path=rcparams.outputs["path_workdir"], + window_length=30, +): + """ + Add the current NWP weights to update today's daily average weight. If the + day is over, update the list of daily average weights covering a rolling + window. + + Parameters + ---------- + current_weights: array-like + Array of shape [model, scale_level, ...] + containing the current weights of the different NWP models per cascade + level. + validtime: datetime + Datetime object containing the date and time at for which the current + weights are valid. + outdir_path: string, optional + Path to folder where the historical weights are stored. Defaults to + path_workdir from rcparams. + window_length: int, optional + Length of window (in days) of daily weights that should be retained. + Defaults to 30. + + Returns + ------- + None + + """ + + n_cascade_levels = current_weights.shape[1] + + # Load weights_today, a dictionary containing {mean_weights, n, last_validtime} + weights_today_file = join(outdir_path, "NWP_weights_today.pkl") + if exists(weights_today_file): + with open(weights_today_file, "rb") as f: + weights_today = pickle.load(f) + else: + weights_today = { + "mean_weights": np.copy(current_weights), + "n": 0, + "last_validtime": validtime, + } + + # Load the past weights which is an array with dimensions day x model x scale_level + past_weights_file = join(outdir_path, "NWP_weights_window.npy") + past_weights = None + if exists(past_weights_file): + past_weights = np.load(past_weights_file) + # First check if we have started a new day wrt the last written weights, in which + # case we should update the daily weights file and reset daily statistics. + if weights_today["last_validtime"].date() < validtime.date(): + # Append weights to the list of the past X daily averages. + if past_weights is not None: + past_weights = np.append( + past_weights, [weights_today["mean_weights"]], axis=0 + ) + else: + past_weights = np.array([weights_today["mean_weights"]]) + print(past_weights.shape) + # Remove oldest if the number of entries exceeds the window length. + if past_weights.shape[0] > window_length: + past_weights = np.delete(past_weights, 0, axis=0) + # FIXME also write out last_validtime.date() in this file? + # In that case it will need pickling or netcdf. + # Write out the past weights within the rolling window. + np.save(past_weights_file, past_weights) + # Reset statistics for today. + weights_today["n"] = 0 + weights_today["mean_weights"] = 0 + + # Reset today's weights if needed and/or compute online average from the + # current weights using numerically stable algorithm + weights_today["n"] += 1 + weights_today["mean_weights"] += ( + current_weights - weights_today["mean_weights"] + ) / weights_today["n"] + weights_today["last_validtime"] = validtime + with open(weights_today_file, "wb") as f: + pickle.dump(weights_today, f) + + return None + + +def calc_clim_weights( + n_cascade_levels=8, + outdir_path=rcparams.outputs["path_workdir"], + n_models=1, + window_length=30, +): + """ + Return the climatological weights based on the daily average weights in the + rolling window. This is done using a geometric mean. + + Parameters + ---------- + n_cascade_levels: int, optional + Number of cascade levels. + outdir_path: string, optional + Path to folder where the historical weights are stored. Defaults to + path_workdir from rcparams. + n_models: int, optional + Number of NWP models. Defaults to 1. + window_length: int, optional + Length of window (in days) over which to compute the climatological + weights. Defaults to 30. + + Returns + ------- + climatological_mean_weights: array-like + Array of shape [model, scale_level, ...] containing the climatological + (geometric) mean weights. + + """ + past_weights_file = join(outdir_path, "NWP_weights_window.npy") + # past_weights has dimensions date x model x scale_level x .... + if exists(past_weights_file): + past_weights = np.load(past_weights_file) + else: + past_weights = None + # check if there's enough data to compute the climatological skill + if not past_weights or past_weights.shape[0] < window_length: + return get_default_weights(n_cascade_levels, n_models) + # reduce window if necessary + else: + past_weights = past_weights[-window_length:] + + # Calculate climatological weights from the past_weights using the + # geometric mean. + geomean_weights = np.exp(np.log(past_weights).mean(axis=0)) + + return geomean_weights diff --git a/pysteps/blending/skill_scores.py b/pysteps/blending/skill_scores.py index f8225959f..d53ac3548 100644 --- a/pysteps/blending/skill_scores.py +++ b/pysteps/blending/skill_scores.py @@ -65,7 +65,7 @@ def spatial_correlation(obs, mod): return rho -def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): +def lt_dependent_cor_nwp(lt, correlations, skill_kwargs=dict()): """Determine the correlation of a model field for lead time lt and cascade k, by assuming that the correlation determined at t=0 regresses towards the climatological values. @@ -79,10 +79,9 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): Array of shape [n_cascade_levels] containing per cascade_level the correlation between the normalized cascade of the observed (radar) rainfall field and the normalized cascade of the model field. - outdir_path: str - Path to folder where the historical weights are stored. skill_kwargs : dict, optional - Dictionary containing e.g. the nmodels and window_length parameters. + Dictionary containing e.g. the outdir_path, nmodels and window_length + parameters. Returns ------- @@ -99,9 +98,7 @@ def lt_dependent_cor_nwp(lt, correlations, outdir_path, skill_kwargs=dict()): """ # Obtain the climatological values towards which the correlations will # regress - clim_cor_values, regr_pars = clim_regr_values( - len(correlations), outdir_path, skill_kwargs=None - ) + clim_cor_values, regr_pars = clim_regr_values(n_cascade_levels=len(correlations)) # Determine the speed of the regression (eq. 24 in BPS2004) qm = np.exp(-lt / regr_pars[0, :]) * (2 - np.exp(-lt / regr_pars[1, :])) # Determine the correlation for lead time lt @@ -158,7 +155,8 @@ def lt_dependent_cor_extrapolation(PHI, correlations=None, correlations_prev=Non return rho, rho_prev -def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): +# TODO: Make sure the method can also handle multiple model inputs.. +def clim_regr_values(n_cascade_levels, skill_kwargs=dict()): """Obtains the climatological correlation values and regression parameters from a file called NWP_weights_window.bin in the outdir_path. If this file is not present yet, the values from :cite:`BPS2004` are used. @@ -168,10 +166,9 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): ---------- n_cascade_levels : int The number of cascade levels to use. - outdir_path: str - Path to folder where the historical weights are stored. skill_kwargs : dict, optional - Dictionary containing e.g. the nmodels and window_length parameters. + Dictionary containing e.g. the outdir_path, nmodels and window_length + parameters. Returns ------- @@ -201,16 +198,18 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): # First, obtain climatological skill values try: clim_cor_values = clim.calc_clim_weights( - outdir_path, n_cascade_levels, **skill_kwargs + n_cascade_levels=n_cascade_levels, **skill_kwargs ) except FileNotFoundError: # The climatological skill values file does not exist yet, so we'll # use the default values from BPS2004. clim_cor_values = np.array( - [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040] + [[0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040]] ) - # Check if clim_cor_values has only one dimension, otherwise it has + clim_cor_values = clim_cor_values[0] + + # Check if clim_cor_values has only one model, otherwise it has # returned the skill values for multiple models if clim_cor_values.ndim != 1: raise IndexError( @@ -224,11 +223,10 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): # Get the number of cascade levels that is missing n_extra_lev = n_cascade_levels - clim_cor_values.shape[0] # Append the array with correlation values of 10e-4 - clim_cor_values = np.append(clim_cor_values, np.repeat(10e-4, n_extra_lev)) + clim_cor_values = np.append(clim_cor_values, np.repeat(1e-4, n_extra_lev)) # Get the regression parameters (L in eq. 24 in BPS2004) - Hard coded, # change to own values when present. - # TODO: Regression pars for different cascade levels regr_pars = np.array( [ [130.0, 165.0, 120.0, 55.0, 50.0, 15.0, 15.0, 10.0], @@ -236,4 +234,17 @@ def clim_regr_values(n_cascade_levels, outdir_path, skill_kwargs=dict()): ] ) + # Check whether the number of cascade_levels is correct + if regr_pars.shape[1] > n_cascade_levels: + regr_pars = regr_pars[:, 0:n_cascade_levels] + elif regr_pars.shape[1] < n_cascade_levels: + # Get the number of cascade levels that is missing + n_extra_lev = n_cascade_levels - regr_pars.shape[1] + # Append the array with correlation values of 10e-4 + regr_pars = np.append( + regr_pars, + [np.repeat(10.0, n_extra_lev), np.repeat(10e4, n_extra_lev)], + axis=1, + ) + return clim_cor_values, regr_pars diff --git a/pysteps/pystepsrc b/pysteps/pystepsrc index 7e2b2c9d7..ff8fcb6cc 100644 --- a/pysteps/pystepsrc +++ b/pysteps/pystepsrc @@ -4,7 +4,8 @@ "silent_import": false, "outputs": { // path_outputs : path where to save results (figures, forecasts, etc) - "path_outputs": "./" + "path_outputs": "./", + "path_workdir": "./tmp/" }, "plot": { // "motion_plot" : "streamplot" or "quiver" diff --git a/pysteps/tests/test_blending_clim.py b/pysteps/tests/test_blending_clim.py new file mode 100644 index 000000000..e790984c7 --- /dev/null +++ b/pysteps/tests/test_blending_clim.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +import numpy as np +import pytest + +from pysteps.blending.clim import save_weights, calc_clim_weights +import random +from datetime import datetime, timedelta +from os.path import join, exists +import pickle +from numpy.testing import assert_array_equal + +random.seed(12356) +n_cascade_levels = 7 +model_names = ["alaro13", "arome13"] +default_start_weights = [0.8, 0.5] +""" Helper functions """ + + +def generate_random_weights(n_cascade_levels, n_models=1): + """ + Generate random weights which decay exponentially with scale. + """ + start_weights = np.array([random.uniform(0.5, 0.99) for i in range(n_models)]) + powers = np.arange(1, n_cascade_levels + 1) + return pow(start_weights[:, np.newaxis], powers) + + +def generate_fixed_weights(n_cascade_levels, n_models=1): + """ + Generate weights starting at default_start_weights which decay exponentially with scale. + """ + start_weights = np.resize(default_start_weights, n_models) + powers = np.arange(1, n_cascade_levels + 1) + return pow(start_weights[:, np.newaxis], powers) + + +""" Test arguments """ + +clim_arg_names = ("startdatestr", "enddatestr", "n_models", "expected_weights_today") + +test_enddates = ["20210701235500", "20210702000000", "20200930235500"] + +clim_arg_values = [ + ( + "20210701230000", + "20210701235500", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 12, + "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701235500", + "20210702000000", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 1, + "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), + }, + ), + ( + "20200801000000", + "20200930235500", + 1, + { + "mean_weights": generate_fixed_weights(n_cascade_levels), + "n": 288, + "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701230000", + "20210701235500", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 12, + "last_validtime": datetime.strptime(test_enddates[0], "%Y%m%d%H%M%S"), + }, + ), + ( + "20210701230000", + "20210702000000", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 1, + "last_validtime": datetime.strptime(test_enddates[1], "%Y%m%d%H%M%S"), + }, + ), + ( + "20200801000000", + "20200930235500", + 2, + { + "mean_weights": generate_fixed_weights(n_cascade_levels, 2), + "n": 288, + "last_validtime": datetime.strptime(test_enddates[2], "%Y%m%d%H%M%S"), + }, + ), +] + + +@pytest.mark.parametrize(clim_arg_names, clim_arg_values) +def test_save_weights( + startdatestr, enddatestr, n_models, expected_weights_today, tmpdir +): + """Test if the weights are saved correctly and the daily average is computed""" + + # get validtime + currentdate = datetime.strptime(startdatestr, "%Y%m%d%H%M%S") + enddate = datetime.strptime(enddatestr, "%Y%m%d%H%M%S") + timestep = timedelta(minutes=5) + + outdir_path = tmpdir + + while currentdate <= enddate: + current_weights = generate_fixed_weights(n_cascade_levels, n_models) + print("Saving weights: ", current_weights, currentdate, outdir_path) + save_weights(current_weights, currentdate, outdir_path, window_length=2) + currentdate += timestep + + weights_today_file = join(outdir_path, "NWP_weights_today.pkl") + assert exists(weights_today_file) + with open(weights_today_file, "rb") as f: + weights_today = pickle.load(f) + + # Check type + assert type(weights_today) == type({}) + assert "mean_weights" in weights_today + assert "n" in weights_today + assert "last_validtime" in weights_today + assert_array_equal( + weights_today["mean_weights"], expected_weights_today["mean_weights"] + ) + assert weights_today["n"] == expected_weights_today["n"] + assert weights_today["last_validtime"] == expected_weights_today["last_validtime"] + + +if __name__ == "__main__": + save_weights( + generate_fixed_weights(n_cascade_levels, 1), + datetime.strptime("20200801000000", "%Y%m%d%H%M%S"), + "/tmp/", + ) diff --git a/pysteps/tests/test_blending_skill_scores.py b/pysteps/tests/test_blending_skill_scores.py index ced452fbc..0bf4f1a98 100644 --- a/pysteps/tests/test_blending_skill_scores.py +++ b/pysteps/tests/test_blending_skill_scores.py @@ -19,7 +19,7 @@ ) clim_cor_values_6lev = np.array([0.848, 0.537, 0.237, 0.065, 0.020, 0.0044]) clim_cor_values_9lev = np.array( - [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 10e-4] + [0.848, 0.537, 0.237, 0.065, 0.020, 0.0044, 0.0052, 0.0040, 1e-4] ) # Set the regression values @@ -191,8 +191,6 @@ # The test @pytest.mark.parametrize(skill_scores_arg_names, skill_scores_arg_values) - -# TODO: Also test the pysteps.blending.clim.calc_clim_weights functionality in # The test function to be used def test_blending_skill_scores( obs, @@ -216,7 +214,7 @@ def test_blending_skill_scores( """ # Calculate the spatial correlation of the given model field - correlations_t0 = spatial_correlation(obs, mod) + correlations_t0 = np.array(spatial_correlation(obs, mod)) # Check if the field has the same number of cascade levels as the model # field and as the given n_cascade_levels @@ -231,27 +229,32 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_t0, expected_cor_t0, - "Returned spatial correlation is not the same as the expected value", + decimal=3, + err_msg="Returned spatial correlation is not the same as the expected value", ) # Test if the NWP correlation regresses towards the correct value given # a lead time in minutes # First, check if the climatological values are returned correctly - correlations_clim, regr_clim = clim_regr_values(n_cascade_levels, "random_path") + correlations_clim, regr_clim = clim_regr_values(n_cascade_levels=n_cascade_levels) assert ( correlations_clim.shape[0] == n_cascade_levels ), "Number of cascade levels should be the same as n_cascade_levels" assert_array_almost_equal( correlations_clim, clim_cor_values, - "Not the correct climatological correlations were returned", + decimal=3, + err_msg="Not the correct climatological correlations were returned", ) assert_array_almost_equal( - regr_clim, regr_pars, "Not the correct regression parameters were returned" + regr_clim, + regr_pars, + decimal=3, + err_msg="Not the correct regression parameters were returned", ) # Then, check the regression of the correlation values - correlations_nwp_lt = lt_dependent_cor_nwp(lt, correlations_t0, "random_path") + correlations_nwp_lt = lt_dependent_cor_nwp(lt=lt, correlations=correlations_t0) assert ( correlations_nwp_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" @@ -261,14 +264,17 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_nwp_lt, expected_cor_nwp_lt, - "Correlations of NWP not equal to the expected correlations", + decimal=3, + err_msg="Correlations of NWP not equal to the expected correlations", ) # Finally, make sure nowcast correlation regresses towards the correct # value given some PHI-values. - correlations_nowcast_lt = lt_dependent_cor_extrapolation( + correlations_nowcast_lt, __ = lt_dependent_cor_extrapolation( PHI, correlations_t0, cor_prev ) + + print(correlations_nowcast_lt) assert ( correlations_nowcast_lt.shape[0] == mod.shape[0] ), "Number of cascade levels should be the same as in the model field" @@ -278,5 +284,6 @@ def test_blending_skill_scores( assert_array_almost_equal( correlations_nowcast_lt, expected_cor_nowcast_lt, - "Correlations of nowcast not equal to the expected correlations", + decimal=3, + err_msg="Correlations of nowcast not equal to the expected correlations", ) From 81e88fe888da5b2503df69591159983ba8f23dc7 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Wed, 25 Aug 2021 10:59:32 +0200 Subject: [PATCH 16/17] Add utilities to decompose, store and load NWP cascades for use in blending (#232) * First version of NWP decomposition * Added saving to netCDF * Completed functions for saving and loading decomposed NWP data * Added example files for the decomposed NWP functions * Added compatibility with numpy datetime * Use default output path_workdir for tmp files in blending/utils.py. * Update documentation of NWP decomposition functions in utils.py Co-authored-by: Wout Dewettinck Co-authored-by: wdewettin <87696913+wdewettin@users.noreply.github.com> --- .gitignore | 3 + examples/read_netcdf.py | 9 ++ examples/test_decomposition_NWP.py | 71 +++++++++ pysteps/blending/utils.py | 229 +++++++++++++++++++++++++++++ pysteps/pystepsrc | 2 +- 5 files changed, 313 insertions(+), 1 deletion(-) create mode 100644 examples/read_netcdf.py create mode 100644 examples/test_decomposition_NWP.py diff --git a/.gitignore b/.gitignore index f26dba4ad..6f9756a5c 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,6 @@ venv.bak/ # mypy .mypy_cache/ + +# example data +examples/radar/ diff --git a/examples/read_netcdf.py b/examples/read_netcdf.py new file mode 100644 index 000000000..b7c61c5e1 --- /dev/null +++ b/examples/read_netcdf.py @@ -0,0 +1,9 @@ +import os, netCDF4 +import numpy as np +from pysteps import rcparams +from pysteps.blending.utils import load_NWP + +NWP_output = rcparams.outputs["path_workdir"] + "cascade_alaro13_01_20170131110000.nc" +start_time = np.datetime64("2017-01-31T11:20") + +print(load_NWP(NWP_output, start_time, 4)) diff --git a/examples/test_decomposition_NWP.py b/examples/test_decomposition_NWP.py new file mode 100644 index 000000000..525bdeb66 --- /dev/null +++ b/examples/test_decomposition_NWP.py @@ -0,0 +1,71 @@ +import matplotlib.pyplot as plt +import numpy as np + +from datetime import datetime +from pprint import pprint +from pysteps import io, nowcasts, rcparams +from pysteps.motion.lucaskanade import dense_lucaskanade +from pysteps.postprocessing.ensemblestats import excprob +from pysteps.utils import conversion, dimension, transformation +from pysteps.visualization import plot_precip_field +from pysteps.blending.utils import decompose_NWP +from pysteps.cascade.bandpass_filters import filter_gaussian + +num_cascade_levels = 8 + +############################################################################### +# Read precipitation field +# ------------------------ +# +# First thing, the sequence of Swiss radar composites is imported, converted and +# transformed into units of dBR. + + +date = datetime.strptime("201701311200", "%Y%m%d%H%M") +data_source = "mch" + +# Load data source config +root_path = rcparams.data_sources[data_source]["root_path"] +path_fmt = rcparams.data_sources[data_source]["path_fmt"] +fn_pattern = rcparams.data_sources[data_source]["fn_pattern"] +fn_ext = rcparams.data_sources[data_source]["fn_ext"] +importer_name = rcparams.data_sources[data_source]["importer"] +importer_kwargs = rcparams.data_sources[data_source]["importer_kwargs"] +timestep = rcparams.data_sources[data_source]["timestep"] + +# Find the radar files in the archive +fns = io.find_by_date( + date, root_path, path_fmt, fn_pattern, fn_ext, timestep, num_prev_files=12 +) + +# Read the data from the archive +importer = io.get_method(importer_name, "importer") +R_NWP, _, metadata = io.read_timeseries(fns, importer, legacy=True, **importer_kwargs) + +# Convert to rain rate +R_NWP, metadata = conversion.to_rainrate(R_NWP, metadata) + +# Log-transform the data +R_NWP, metadata = transformation.dB_transform( + R_NWP, metadata, threshold=0.1, zerovalue=-15.0 +) + +# Fill in the missing data with the threshold value +R_NWP[~np.isfinite(R_NWP)] = metadata["zerovalue"] + +# Find the location to save the NWP files +NWP_output = rcparams.outputs["path_workdir"] + +# Define the start time of the NWP forecast +analysis_time = metadata["timestamps"][0] + +# Decompose the NWP and save to netCDF file +decompose_NWP( + R_NWP, + "alaro13_01", + analysis_time, + 5, + metadata["timestamps"], + num_cascade_levels, + NWP_output, +) diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index 041e9e1b6..df9d9ce54 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -11,9 +11,17 @@ stack_cascades normalize_cascade blend_optical_flows + decompose_NWP + load_NWP """ import numpy as np +from datetime import datetime, timedelta +from pysteps.cascade import get_method as cascade_get_method +from pysteps.cascade.bandpass_filters import filter_gaussian +from pysteps import utils, rcparams +import os +import netCDF4 def stack_cascades(R_d, donorm=True): @@ -136,3 +144,224 @@ def blend_optical_flows(flows, weights): combined_flows = np.sum(all_c_wn, axis=-1) # combined_flows [2, m, n] return combined_flows + + +def decompose_NWP( + R_NWP, + NWP_model, + analysis_time, + timestep, + valid_times, + num_cascade_levels=8, + output_path=rcparams.outputs["path_workdir"], + decomp_method="fft", + fft_method="numpy", + domain="spatial", + normalize=True, + compute_stats=True, + compact_output=True, +): + """Decomposes the NWP forecast data into cascades and saves it in + a netCDF file + + Parameters + ---------- + R_NWP: array-like + Array of dimension (n_timesteps, x, y) containing the precipitation forecast + from some NWP model. + NWP_model: str + The name of the NWP model + analysis_time: numpy.datetime64 + The analysis time of the NWP forecast. The analysis time is assumed to be a + numpy.datetime64 type as imported by the pysteps importer + timestep: int + Timestep in minutes between subsequent NWP forecast fields + valid_times: array_like + Array containing the valid times of the NWP forecast fields. The times are + assumed to be numpy.datetime64 types as imported by the pysteps importer + num_cascade_levels: int, optional + The number of frequency bands to use. Must be greater than 2. Defaults to 8. + output_path: str, optional + The location where to save the file with the NWP cascade. Defaults to the + path_workdir specified in the rcparams file. + + Other Parameters + ---------------- + decomp_method: str, optional + A string defining the decomposition method to use. Defaults to "fft". + fft_method: str or tuple, optional + A string or a (function,kwargs) tuple defining the FFT method to use + (see :py:func:`pysteps.utils.interface.get_method`). + Defaults to "numpy". This option is not used if input_domain and + output_domain are both set to "spectral". + domain: {"spatial", "spectral"}, optional + If "spatial", the output cascade levels are transformed back to the + spatial domain by using the inverse FFT. If "spectral", the cascade is + kept in the spectral domain. Defaults to "spatial". + normalize: bool, optional + If True, normalize the cascade levels to zero mean and unit variance. + Requires that compute_stats is True. Implies that compute_stats is True. + Defaults to False. + compute_stats: bool, optional + If True, the output dictionary contains the keys "means" and "stds" + for the mean and standard deviation of each output cascade level. + Defaults to False. + compact_output: bool, optional + Applicable if output_domain is "spectral". If set to True, only the + parts of the Fourier spectrum with non-negligible filter weights are + stored. Defaults to False. + + + Returns + ------- + Nothing + """ + + # Make a NetCDF file + date_string = np.datetime_as_string(analysis_time, "s") + outfn = os.path.join( + output_path, + "cascade_" + + NWP_model + + "_" + + date_string[:4] + + date_string[5:7] + + date_string[8:10] + + date_string[11:13] + + date_string[14:16] + + date_string[17:19] + + ".nc", + ) + ncf = netCDF4.Dataset(outfn, "w", format="NETCDF4") + + # Express times relative to the zero time + zero_time = np.datetime64("1970-01-01T00:00:00", "ns") + valid_times = np.array(valid_times) - zero_time + analysis_time = analysis_time - zero_time + + # Set attributes of decomposition method + ncf.domain = domain + ncf.normalized = int(normalize) + ncf.compact_output = int(compact_output) + ncf.analysis_time = int(analysis_time) + ncf.timestep = int(timestep) + + # Create dimensions + time_dim = ncf.createDimension("time", R_NWP.shape[0]) + casc_dim = ncf.createDimension("cascade_levels", num_cascade_levels) + x_dim = ncf.createDimension("x", R_NWP.shape[1]) + y_dim = ncf.createDimension("y", R_NWP.shape[2]) + + # Create variables (decomposed cascade, means and standard deviations) + R_d = ncf.createVariable( + "pr_decomposed", np.float64, ("time", "cascade_levels", "x", "y") + ) + means = ncf.createVariable("means", np.float64, ("time", "cascade_levels")) + stds = ncf.createVariable("stds", np.float64, ("time", "cascade_levels")) + v_times = ncf.createVariable("valid_times", np.float64, ("time",)) + v_times.units = "nanoseconds since 1970-01-01 00:00:00" + + # The valid times are saved as an array of floats, because netCDF files can't handle datetime types + v_times[:] = np.array([np.float64(valid_times[i]) for i in range(len(valid_times))]) + + # Decompose the NWP data + filter_g = filter_gaussian(R_NWP.shape[1:], num_cascade_levels) + fft = utils.get_method(fft_method, shape=R_NWP.shape[1:], n_threads=1) + decomp_method, _ = cascade_get_method(decomp_method) + + for i in range(R_NWP.shape[0]): + R_ = decomp_method( + R_NWP[i, :, :], + filter_g, + fft_method=fft, + output_domain=domain, + normalize=normalize, + compute_stats=compute_stats, + compact_output=compact_output, + ) + + # Save data to netCDF file + R_d[i, :, :, :] = R_["cascade_levels"] + means[i, :] = R_["means"] + stds[i, :] = R_["stds"] + + # Close the file + ncf.close() + + +def load_NWP(input_nc_path, start_time, n_timesteps): + """Loads the decomposed NWP data from the netCDF files + + Parameters + ---------- + input_nc_path: str + Path to the saved netCDF file containing the decomposed NWP data. + start_time: numpy.datetime64 + The start time of the nowcasting. Assumed to be a numpy.datetime64 type + n_timesteps: int + Number of time steps to forecast + + Returns + ------- + R_d: list + A list of dictionaries with each element in the list corresponding to + a different time step. Each dictionary has the same structure as the + output of the decomposition function + """ + + # Open the file + ncf = netCDF4.Dataset(input_nc_path, "r", format="NETCDF4") + + # Initialise the decomposition dictionary + decomp_dict = dict() + decomp_dict["domain"] = ncf.domain + decomp_dict["normalized"] = bool(ncf.normalized) + decomp_dict["compact_output"] = bool(ncf.compact_output) + + # Convert the start time and the timestep to datetime64 and timedelta64 type + zero_time = np.datetime64("1970-01-01T00:00:00", "ns") + analysis_time = np.timedelta64(int(ncf.analysis_time), "ns") + zero_time + + timestep = ncf.timestep + timestep = np.timedelta64(timestep, "m") + + valid_times = ncf.variables["valid_times"][:] + valid_times = np.array( + [np.timedelta64(int(valid_times[i]), "ns") for i in range(len(valid_times))] + ) + valid_times = valid_times + zero_time + + # Add the valid times to the output + decomp_dict["valid_times"] = valid_times + + # Find the indices corresponding with the required start and end time + start_i = (start_time - analysis_time) // timestep + assert analysis_time + start_i * timestep == start_time + end_i = start_i + n_timesteps + 1 + + # Initialise the list of dictionaries which will serve as the output (cf: the STEPS function) + R_d = list() + + for i in range(start_i, end_i): + decomp_dict_ = decomp_dict.copy() + + cascade_levels = ncf.variables["pr_decomposed"][i, :, :, :] + + # In the netcdf file this is saved as a masked array, so we're checking if there is no mask + assert not cascade_levels.mask + + means = ncf.variables["means"][i, :] + assert not means.mask + + stds = ncf.variables["stds"][i, :] + assert not stds.mask + + # Save the values in the dictionary as normal arrays with the filled method + decomp_dict_["cascade_levels"] = np.ma.filled(cascade_levels) + decomp_dict_["means"] = np.ma.filled(means) + decomp_dict_["stds"] = np.ma.filled(stds) + + # Append the output list + R_d.append(decomp_dict_) + + return R_d diff --git a/pysteps/pystepsrc b/pysteps/pystepsrc index 4d7f2f878..051827c64 100644 --- a/pysteps/pystepsrc +++ b/pysteps/pystepsrc @@ -5,7 +5,7 @@ "outputs": { // path_outputs : path where to save results (figures, forecasts, etc) "path_outputs": "./", - "path_workdir": "./tmp/" + "path_workdir": "./" }, "plot": { // "motion_plot" : "streamplot" or "quiver" From e9059cbd9557fa9963ac046ea02915446079d203 Mon Sep 17 00:00:00 2001 From: Lesley De Cruz Date: Thu, 26 Aug 2021 18:07:18 +0200 Subject: [PATCH 17/17] Avoid shadowing of pysteps.blending.utils by pysteps.utils --- pysteps/blending/utils.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pysteps/blending/utils.py b/pysteps/blending/utils.py index df9d9ce54..e08b15c05 100644 --- a/pysteps/blending/utils.py +++ b/pysteps/blending/utils.py @@ -19,7 +19,8 @@ from datetime import datetime, timedelta from pysteps.cascade import get_method as cascade_get_method from pysteps.cascade.bandpass_filters import filter_gaussian -from pysteps import utils, rcparams +from pysteps import rcparams +from pysteps.utils import get_method as utils_get_method import os import netCDF4 @@ -266,7 +267,7 @@ def decompose_NWP( # Decompose the NWP data filter_g = filter_gaussian(R_NWP.shape[1:], num_cascade_levels) - fft = utils.get_method(fft_method, shape=R_NWP.shape[1:], n_threads=1) + fft = utils_get_method(fft_method, shape=R_NWP.shape[1:], n_threads=1) decomp_method, _ = cascade_get_method(decomp_method) for i in range(R_NWP.shape[0]):