Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Fix exceptions when Series.interpolate's order parameter is missing or invalid #25246

Merged
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v0.25.0.rst
Expand Up @@ -139,7 +139,7 @@ Indexing
Missing
^^^^^^^

-
- Fixed misleading exception message in :meth:`Series.missing` if argument ``order`` is required, but omitted (:issue:`10633`, :issue:`24014`).
-
-

Expand Down
30 changes: 12 additions & 18 deletions pandas/core/internals/blocks.py
Expand Up @@ -1115,24 +1115,18 @@ def check_int_bool(self, inplace):
fill_value=fill_value,
coerce=coerce,
downcast=downcast)
# try an interp method
try:
m = missing.clean_interp_method(method, **kwargs)
except ValueError:
m = None

if m is not None:
r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

raise ValueError("invalid method '{0}' to interpolate.".format(method))
# validate the interp method
m = missing.clean_interp_method(method, **kwargs)

gfyoung marked this conversation as resolved.
Show resolved Hide resolved
r = check_int_bool(self, inplace)
if r is not None:
return r
return self._interpolate(method=m, index=index, values=values,
axis=axis, limit=limit,
limit_direction=limit_direction,
limit_area=limit_area,
fill_value=fill_value, inplace=inplace,
downcast=downcast, **kwargs)

def _interpolate_with_fill(self, method='pad', axis=0, inplace=False,
limit=None, fill_value=None, coerce=False,
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/missing.py
Expand Up @@ -293,9 +293,10 @@ def _interpolate_scipy_wrapper(x, y, new_x, method, fill_value=None,
bounds_error=bounds_error)
new_y = terp(new_x)
elif method == 'spline':
# GH #10633
if not order:
raise ValueError("order needs to be specified and greater than 0")
# GH #10633, #24014
if isna(order) or (order <= 0):
raise ValueError("order needs to be specified and greater than 0; "
"got order: {}".format(order))
terp = interpolate.UnivariateSpline(x, y, k=order, **kwargs)
new_y = terp(new_x)
else:
Expand Down
97 changes: 61 additions & 36 deletions pandas/tests/series/test_missing.py
Expand Up @@ -854,8 +854,23 @@ def test_series_pad_backfill_limit(self):
assert_series_equal(result, expected)


class TestSeriesInterpolateData():
@pytest.fixture(params=['linear', 'index', 'values', 'nearest', 'slinear',
'zero', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial',
'from_derivatives', 'pchip', 'akima', ])
def nontemporal_method(request):
""" Fixture that returns an (method name, required kwargs) pair.

This fixture does not include method 'time' as a parameterization; that
method requires a Series with a DatetimeIndex, and is generally tested
separately from these non-temporal methods.
"""
method = request.param
kwargs = dict(order=1) if method in ('spline', 'polynomial') else dict()
return method, kwargs


class TestSeriesInterpolateData():
def test_interpolate(self, datetime_series, string_series):
ts = Series(np.arange(len(datetime_series), dtype=float),
datetime_series.index)
Expand All @@ -875,12 +890,12 @@ def test_interpolate(self, datetime_series, string_series):
time_interp = ord_ts_copy.interpolate(method='time')
tm.assert_series_equal(time_interp, ord_ts)

# try time interpolation on a non-TimeSeries
# Only raises ValueError if there are NaNs.
non_ts = string_series.copy()
non_ts[0] = np.NaN
msg = ("time-weighted interpolation only works on Series or DataFrames"
" with a DatetimeIndex")
def test_interpolate_time_raises_for_non_timeseries(self):
# When method='time' is used on a non-TimeSeries that contains a null
# value, a ValueError should be raised.
non_ts = Series([0, 1, 2, np.NaN])
msg = ("time-weighted interpolation only works on Series.* "
"with a DatetimeIndex")
with pytest.raises(ValueError, match=msg):
non_ts.interpolate(method='time')

Expand Down Expand Up @@ -1061,21 +1076,35 @@ def test_interp_limit(self):
result = s.interpolate(method='linear', limit=2)
assert_series_equal(result, expected)

# GH 9217, make sure limit is an int and greater than 0
methods = ['linear', 'time', 'index', 'values', 'nearest', 'zero',
'slinear', 'quadratic', 'cubic', 'barycentric', 'krogh',
'polynomial', 'spline', 'piecewise_polynomial', None,
'from_derivatives', 'pchip', 'akima']
s = pd.Series([1, 2, np.nan, np.nan, 5])
msg = (r"Limit must be greater than 0|"
"time-weighted interpolation only works on Series or"
r" DataFrames with a DatetimeIndex|"
r"invalid method '(polynomial|spline|None)' to interpolate|"
"Limit must be an integer")
for limit in [-1, 0, 1., 2.]:
for method in methods:
with pytest.raises(ValueError, match=msg):
s.interpolate(limit=limit, method=method)
@pytest.mark.parametrize("limit", [-1, 0])
def test_interpolate_invalid_nonpositive_limit(self, nontemporal_method,
limit):
# GH 9217: make sure limit is greater than zero.
s = pd.Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
with pytest.raises(ValueError, match="Limit must be greater than 0"):
s.interpolate(limit=limit, method=method, **kwargs)

def test_interpolate_invalid_float_limit(self, nontemporal_method):
# GH 9217: make sure limit is an integer.
s = pd.Series([1, 2, np.nan, 4])
method, kwargs = nontemporal_method
limit = 2.0
with pytest.raises(ValueError, match="Limit must be an integer"):
s.interpolate(limit=limit, method=method, **kwargs)

@pytest.mark.parametrize("invalid_method", [None, 'nonexistent_method'])
def test_interp_invalid_method(self, invalid_method):
s = Series([1, 3, np.nan, 12, np.nan, 25])

msg = "method must be one of.* Got '{}' instead".format(invalid_method)
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method)

# When an invalid method and invalid limit (such as -1) are
# provided, the error message reflects the invalid method.
with pytest.raises(ValueError, match=msg):
s.interpolate(method=invalid_method, limit=-1)

def test_interp_limit_forward(self):
s = Series([1, 3, np.nan, np.nan, np.nan, 11])
Expand Down Expand Up @@ -1276,11 +1305,20 @@ def test_interp_limit_no_nans(self):
@td.skip_if_no_scipy
@pytest.mark.parametrize("method", ['polynomial', 'spline'])
def test_no_order(self, method):
# see GH-10633, GH-24014
s = Series([0, 1, np.nan, 3])
msg = "invalid method '{}' to interpolate".format(method)
msg = "You must specify the order of the spline or polynomial"
with pytest.raises(ValueError, match=msg):
s.interpolate(method=method)

@td.skip_if_no_scipy
@pytest.mark.parametrize('order', [-1, -1.0, 0, 0.0, np.nan])
def test_interpolate_spline_invalid_order(self, order):
s = Series([0, 1, np.nan, 3])
msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=order)

@td.skip_if_no_scipy
def test_spline(self):
s = Series([1, 2, np.nan, 4, 5, np.nan, 7])
Expand Down Expand Up @@ -1313,19 +1351,6 @@ def test_spline_interpolation(self):
expected1 = s.interpolate(method='spline', order=1)
assert_series_equal(result1, expected1)

@td.skip_if_no_scipy
def test_spline_error(self):
# see gh-10633
s = pd.Series(np.arange(10) ** 2)
s[np.random.randint(0, 9, 3)] = np.nan
msg = "invalid method 'spline' to interpolate"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline')

msg = "order needs to be specified and greater than 0"
with pytest.raises(ValueError, match=msg):
s.interpolate(method='spline', order=0)

def test_interp_timedelta64(self):
# GH 6424
df = Series([1, np.nan, 3],
Expand Down