Skip to content

Commit

Permalink
Refactor utils.interpolate module (#210)
Browse files Browse the repository at this point in the history
* Rename to iwdinterp2d; adapt docstrings, variable names, and interface

* Fix interface

* Initial wrapper to scipy's method

* Aesthetics

* Use mode=N-D to do multivariate interpolation

* Fix black check

By upgrading black to latest version

* Support parameters of the original function

* Refactor idw method

* Aesthetics

* Implement checks as a decorator

* Add tests

* Aesthetics

* Remove unnecessary test

* Remove unnecessary test

* Test for wrong number of dimensions

* Chunking of the dst grid is done in the preamble

* Fix chunking

* Simplify idw

* Add LRU caching of interpolator classes

* Small adjustments

* Change default interp method to idw

* Adapt test

* Adapt examples

* Silence some warnings

And other super minor changes

* Fix black check

* Fix tests

* Aesthetics

* Fix docstrings

Or at least try...

* Fix univariate interpolation

And include test

* Fix black check

* Super minor changes

* Update precommit config

* Define RBF in docstring

* Add function to append extra kwrds to docstrings

* Rename decorator

* Test idw with k=1 or k=None

* Test that all outputs are finite nuumberrs

* Update pysteps/utils/interpolate.py

Co-authored-by: Loris Foresti <39999237+loforest@users.noreply.github.com>

* Improve variable naming

* Workaround for TypeError issue with Pillow 8.3

* Add abbreviation for inverse distance weighting

* Fix typo

* Docstring polishing in idwinterp2d

* Small refactoring

* Docstring polishing

* Improve docstrings

* Adapt variable naming

* Inputs must be ndarrays

* Fix typo

* Make the offset contant a user-selectable parameter

Co-authored-by: Andres Perez Hortal <16256571+aperezhortal@users.noreply.github.com>
Co-authored-by: Loris Foresti <39999237+loforest@users.noreply.github.com>
Co-authored-by: Seppo Pulkkinen <pulkkins@gmail.com>
  • Loading branch information
4 people committed Jul 4, 2021
1 parent 76324d8 commit 69061dd
Show file tree
Hide file tree
Showing 15 changed files with 484 additions and 221 deletions.
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
repos:
- repo: https://github.com/psf/black
rev: 20.8b1
rev: 21.6b0
hooks:
- id: black
language_version: python3
11 changes: 2 additions & 9 deletions examples/LK_buffer_mask.py
Original file line number Diff line number Diff line change
Expand Up @@ -169,16 +169,9 @@
# By comparing the velocity of the motion fields, we can easily notice
# the negative bias that is introduced by the the erroneous interpretation of
# velocities near the maximum range of the radars.
# Please note that we are setting a small shape parameter ``epsilon`` for the
# interpolation routine. This will produce a smoother motion field.

interp_kwargs = {"epsilon": 5} # use a small shape parameter for interpolation
UV1 = dense_lucaskanade(
R, dense=True, fd_kwargs=fd_kwargs1, interp_kwargs=interp_kwargs
)
UV2 = dense_lucaskanade(
R, dense=True, fd_kwargs=fd_kwargs2, interp_kwargs=interp_kwargs
)
UV1 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs1)
UV2 = dense_lucaskanade(R, dense=True, fd_kwargs=fd_kwargs2)

V1 = np.sqrt(UV1[0] ** 2 + UV1[1] ** 2)
V2 = np.sqrt(UV2[0] ** 2 + UV2[1] ** 2)
Expand Down
2 changes: 1 addition & 1 deletion examples/optical_flow_methods_convergence.py
Original file line number Diff line number Diff line change
Expand Up @@ -290,7 +290,7 @@ def plot_optflow_method_convergence(input_precip, optflow_method_name, motion_ty
axis=0
)

cmap = get_cmap("jet")
cmap = get_cmap("jet").copy()
cmap.set_under("grey", alpha=0.25)
cmap.set_over("none")

Expand Down
169 changes: 156 additions & 13 deletions pysteps/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,34 @@
postprocess_import
check_input_frames
prepare_interpolator
memoize
"""
import inspect
import uuid
from collections import defaultdict
from functools import wraps

import numpy as np


def _add_extra_kwrds_to_docstrings(target_func, extra_kwargs_doc_text):
"""
Update the functions docstrings by replacing the `{extra_kwargs_doc}` occurences in
the docstring by the `extra_kwargs_doc_text` value.
"""
# Clean up indentation from docstrings for the
# docstrings to be merged correctly.
extra_kwargs_doc = inspect.cleandoc(extra_kwargs_doc_text)
target_func.__doc__ = inspect.cleandoc(target_func.__doc__)

# Add extra kwargs docstrings
target_func.__doc__ = target_func.__doc__.format_map(
defaultdict(str, extra_kwargs_doc=extra_kwargs_doc)
)
return target_func


def postprocess_import(fillna=np.nan, dtype="double"):
"""
Postprocess the imported precipitation data.
Expand Down Expand Up @@ -82,19 +102,7 @@ def _import_with_postprocessing(*args, **kwargs):
By default, np.nan is used.
"""

# Clean up indentation from docstrings for the
# docstrings to be merged correctly.
extra_kwargs_doc = inspect.cleandoc(extra_kwargs_doc)
_import_with_postprocessing.__doc__ = inspect.cleandoc(
_import_with_postprocessing.__doc__
)

# Add extra kwargs docstrings
_import_with_postprocessing.__doc__ = (
_import_with_postprocessing.__doc__.format_map(
defaultdict(str, extra_kwargs_doc=extra_kwargs_doc)
)
)
_add_extra_kwrds_to_docstrings(_import_with_postprocessing, extra_kwargs_doc)

return _import_with_postprocessing

Expand Down Expand Up @@ -140,3 +148,138 @@ def new_function(*args, **kwargs):
return new_function

return _check_input_frames


def prepare_interpolator(nchunks=4):
"""
Check that all the inputs have the correct shape, and that all values are
finite. It also split the destination grid in `nchunks` parts, and process each
part independently.
"""

def _preamble_interpolation(interpolator):
@wraps(interpolator)
def _interpolator_with_preamble(xy_coord, values, xgrid, ygrid, **kwargs):
nonlocal nchunks # https://stackoverflow.com/questions/5630409/

values = values.copy()
xy_coord = xy_coord.copy()

input_ndims = values.ndim
input_nvars = 1 if input_ndims == 1 else values.shape[1]
input_nsamples = values.shape[0]

coord_ndims = xy_coord.ndim
coord_nsamples = xy_coord.shape[0]

grid_shape = (ygrid.size, xgrid.size)

if np.any(~np.isfinite(values)):
raise ValueError("argument 'values' contains non-finite values")
if np.any(~np.isfinite(xy_coord)):
raise ValueError("argument 'xy_coord' contains non-finite values")

if input_ndims > 2:
raise ValueError(
"argument 'values' must have 1 (n) or 2 dimensions (n, m), "
f"but it has {input_ndims}"
)
if not coord_ndims == 2:
raise ValueError(
"argument 'xy_coord' must have 2 dimensions (n, 2), "
f"but it has {coord_ndims}"
)

if not input_nsamples == coord_nsamples:
raise ValueError(
"the number of samples in argument 'values' does not match the "
f"number of coordinates {input_nsamples}!={coord_nsamples}"
)

# only one sample, return uniform output
if input_nsamples == 1:
output_array = np.ones((input_nvars,) + grid_shape)
for n, v in enumerate(values[0, ...]):
output_array[n, ...] *= v
return output_array.squeeze()

# all equal elements, return uniform output
if values.max() == values.min():
return np.ones((input_nvars,) + grid_shape) * values.ravel()[0]

# split grid in n chunks
nchunks = int(kwargs.get("nchunks", nchunks) ** 0.5)
if nchunks > 1:
subxgrids = np.array_split(xgrid, nchunks)
subxgrids = [x for x in subxgrids if x.size > 0]
subygrids = np.array_split(ygrid, nchunks)
subygrids = [y for y in subygrids if y.size > 0]

# generate a unique identifier to be used for caching
# intermediate results
kwargs["hkey"] = uuid.uuid1().int
else:
subxgrids = [xgrid]
subygrids = [ygrid]

interpolated = np.zeros((input_nvars,) + grid_shape)
indx = 0
for subxgrid in subxgrids:
deltax = subxgrid.size
indy = 0
for subygrid in subygrids:
deltay = subygrid.size
interpolated[
:, indy : (indy + deltay), indx : (indx + deltax)
] = interpolator(xy_coord, values, subxgrid, subygrid, **kwargs)
indy += deltay
indx += deltax

return interpolated.squeeze()

extra_kwargs_doc = """
nchunks: int, optional
Split and process the destination grid in nchunks.
Useful for large grids to limit the memory footprint.
"""

_add_extra_kwrds_to_docstrings(_interpolator_with_preamble, extra_kwargs_doc)

return _interpolator_with_preamble

return _preamble_interpolation


def memoize(maxsize=10):
"""
Add a Least Recently Used (LRU) cache to any function.
Caching is purely based on the optional keyword argument 'hkey', which needs
to be a hashable.
Parameters
----------
maxsize: int, optional
The maximum number of elements stored in the LRU cache.
"""

def _memoize(func):
cache = dict()
hkeys = []

@wraps(func)
def _func_with_cache(*args, **kwargs):
hkey = kwargs.pop("hkey", None)
if hkey in cache:
return cache[hkey]
result = func(*args, **kwargs)
if hkey is not None:
cache[hkey] = result
hkeys.append(hkey)
if len(hkeys) > maxsize:
cache.pop(hkeys.pop(0))

return result

return _func_with_cache

return _memoize
12 changes: 5 additions & 7 deletions pysteps/io/exporters.py
Original file line number Diff line number Diff line change
Expand Up @@ -510,14 +510,14 @@ def initialize_forecast_exporter_netcdf(
pr = pyproj.Proj(metadata["projection"])
lon, lat = pr(x_2d.flatten(), y_2d.flatten(), inverse=True)

var_lon = ncf.createVariable("lon", np.float, dimensions=("y", "x"))
var_lon = ncf.createVariable("lon", float, dimensions=("y", "x"))
var_lon[:] = lon.reshape(shape)
var_lon.standard_name = "longitude"
var_lon.long_name = "longitude coordinate"
# TODO(exporters): Don't hard-code the unit.
var_lon.units = "degrees_east"

var_lat = ncf.createVariable("lat", np.float, dimensions=("y", "x"))
var_lat = ncf.createVariable("lat", float, dimensions=("y", "x"))
var_lat[:] = lat.reshape(shape)
var_lat.standard_name = "latitude"
var_lat.long_name = "latitude coordinate"
Expand All @@ -533,22 +533,20 @@ def initialize_forecast_exporter_netcdf(
) = _convert_proj4_to_grid_mapping(metadata["projection"])
# skip writing the grid mapping if a matching name was not found
if grid_mapping_var_name is not None:
var_gm = ncf.createVariable(grid_mapping_var_name, np.int, dimensions=())
var_gm = ncf.createVariable(grid_mapping_var_name, int, dimensions=())
var_gm.grid_mapping_name = grid_mapping_name
for i in grid_mapping_params.items():
var_gm.setncattr(i[0], i[1])

if incremental == "member" or n_ens_gt_one:
var_ens_num = ncf.createVariable(
"ens_number", np.int, dimensions=("ens_number",)
)
var_ens_num = ncf.createVariable("ens_number", int, dimensions=("ens_number",))
if incremental != "member":
var_ens_num[:] = list(range(1, n_ens_members + 1))
var_ens_num.long_name = "ensemble member"
var_ens_num.standard_name = "realization"
var_ens_num.units = ""

var_time = ncf.createVariable("time", np.int, dimensions=("time",))
var_time = ncf.createVariable("time", int, dimensions=("time",))
if incremental != "timestep":
var_time[:] = [i * timestep * 60 for i in range(1, n_timesteps + 1)]
var_time.long_name = "forecast time"
Expand Down
16 changes: 8 additions & 8 deletions pysteps/io/importers.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@
| ypixelsize | grid resolution in y-direction |
+------------------+----------------------------------------------------------+
| cartesian_unit | the physical unit of the cartesian x- and y-coordinates: |
| | e.g. 'm' or 'km' |
| | e.g. 'm' or 'km' |
+------------------+----------------------------------------------------------+
| yorigin | a string specifying the location of the first element in |
| | the data raster w.r.t. y-axis: |
Expand Down Expand Up @@ -934,12 +934,12 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
metadata = geodata

# import gif file
B = Image.open(filename)
img = Image.open(filename)

if product.lower() in ["azc", "rzc", "precip"]:

# convert 8-bit GIF colortable to RGB values
Brgb = B.convert("RGB")
img_rgb = img.convert("RGB")

# load lookup table
if product.lower() == "azc":
Expand All @@ -954,12 +954,12 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
lut = dict(zip(zip(lut[:, 1], lut[:, 2], lut[:, 3]), lut[:, -1]))

# apply lookup table conversion
precip = np.zeros(len(Brgb.getdata()))
for i, dn in enumerate(Brgb.getdata()):
precip = np.zeros(len(img_rgb.getdata()))
for i, dn in enumerate(img_rgb.getdata()):
precip[i] = lut.get(dn, np.nan)

# convert to original shape
width, height = B.size
width, height = img.size
precip = precip.reshape(height, width)

# set values outside observational range to NaN,
Expand All @@ -970,7 +970,7 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
elif product.lower() in ["aqc", "cpc", "acquire ", "combiprecip"]:

# convert digital numbers to physical values
B = np.array(B, dtype=int)
img = np.array(img).astype(int)

# build lookup table [mm/5min]
lut = np.zeros(256)
Expand All @@ -985,7 +985,7 @@ def import_mch_gif(filename, product, unit, accutime, **kwargs):
lut[i] = (10.0 ** ((i - 71.5) / 20.0) / a) ** (1.0 / b)

# apply lookup table
precip = lut[B]
precip = lut[img]

else:
raise ValueError("unknown product %s" % product)
Expand Down
2 changes: 1 addition & 1 deletion pysteps/motion/lucaskanade.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def dense_lucaskanade(
lk_kwargs=None,
fd_method="shitomasi",
fd_kwargs=None,
interp_method="rbfinterp2d",
interp_method="idwinterp2d",
interp_kwargs=None,
dense=True,
nr_std_outlier=3,
Expand Down
4 changes: 2 additions & 2 deletions pysteps/nowcasts/sseps.py
Original file line number Diff line number Diff line change
Expand Up @@ -230,9 +230,9 @@ def forecast(
)

if np.isscalar(win_size):
win_size = (np.int(win_size), np.int(win_size))
win_size = (int(win_size), int(win_size))
else:
win_size = tuple([np.int(win_size[i]) for i in range(2)])
win_size = tuple([int(win_size[i]) for i in range(2)])

timestep = metadata["accutime"]
kmperpixel = metadata["xpixelsize"] / 1000
Expand Down
33 changes: 33 additions & 0 deletions pysteps/tests/test_decorators.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# -*- coding: utf-8 -*-
import time

from pysteps.decorators import memoize


def test_memoize():
@memoize(maxsize=1)
def _slow_function(x, **kwargs):
time.sleep(1)
return x

for i in range(2):
out = _slow_function(i, hkey=i)
assert out == i

# cached result
t0 = time.monotonic()
out = _slow_function(1, hkey=1)
assert time.monotonic() - t0 < 1
assert out == 1

# maxsize exceeded
t0 = time.monotonic()
out = _slow_function(0, hkey=0)
assert time.monotonic() - t0 >= 1
assert out == 0

# no hash
t0 = time.monotonic()
out = _slow_function(1)
assert time.monotonic() - t0 >= 1
assert out == 1
2 changes: 1 addition & 1 deletion pysteps/tests/test_nowcasts_steps.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@
(5, 6, 2, "sprog", None, "spatial", 3, 8.35),
(5, 6, 2, "obs", None, "spatial", 3, 8.30),
(5, 6, 2, None, "cdf", "spatial", 3, 0.60),
(5, 6, 2, None, "mean", "spatial", 3, 1.30),
(5, 6, 2, None, "mean", "spatial", 3, 1.35),
(5, 6, 2, "incremental", "cdf", "spectral", 3, 0.60),
]

Expand Down

0 comments on commit 69061dd

Please sign in to comment.