Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

BUG: Series.interpolate with dt64/td64 raises #51005

Merged
merged 5 commits into from
Feb 3, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,7 @@ Datetimelike
- Bug in :func:`to_datetime` was raising ``ValueError`` when parsing mixed-offset :class:`Timestamp` with ``errors='ignore'`` (:issue:`50585`)
- Bug in :func:`to_datetime` was incorrectly handling floating-point inputs within 1 ``unit`` of the overflow boundaries (:issue:`50183`)
- Bug in :func:`to_datetime` with unit of "Y" or "M" giving incorrect results, not matching pointwise :class:`Timestamp` results (:issue:`50870`)
- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with datetime or timedelta dtypes incorrectly raising ``ValueError`` (:issue:`11312`)
- Bug in :func:`to_datetime` was not returning input with ``errors='ignore'`` when input was out-of-bounds (:issue:`50587`)
-

Expand Down
33 changes: 33 additions & 0 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -1599,6 +1599,7 @@ def values_for_json(self) -> np.ndarray:

def interpolate(
self,
*,
method: FillnaOptions = "pad",
axis: int = 0,
inplace: bool = False,
Expand Down Expand Up @@ -1974,6 +1975,38 @@ class DatetimeLikeBlock(NDArrayBackedExtensionBlock):
def values_for_json(self) -> np.ndarray:
return self.values._ndarray

def interpolate(
self,
*,
method: FillnaOptions = "pad",
index: Index | None = None,
axis: int = 0,
inplace: bool = False,
limit: int | None = None,
fill_value=None,
**kwargs,
):
values = self.values

# error: Non-overlapping equality check (left operand type:
# "Literal['backfill', 'bfill', 'ffill', 'pad']", right operand type:
# "Literal['linear']") [comparison-overlap]
if method == "linear": # type: ignore[comparison-overlap]
# TODO: GH#50950 implement for arbitrary EAs
data_out = values._ndarray if inplace else values._ndarray.copy()
missing.interpolate_array_2d(
data_out, method=method, limit=limit, index=index, axis=axis
)
new_values = type(values)._simple_new(data_out, dtype=values.dtype)
return self.make_block_same_class(new_values)

elif values.ndim == 2 and axis == 0:
# NDArrayBackedExtensionArray.fillna assumes axis=1
new_values = values.T.fillna(value=fill_value, method=method, limit=limit).T
else:
new_values = values.fillna(value=fill_value, method=method, limit=limit)
return self.make_block_same_class(new_values)


class DatetimeTZBlock(DatetimeLikeBlock):
"""implement a datetime64 block with a tz attribute"""
Expand Down
11 changes: 10 additions & 1 deletion pandas/core/missing.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
import numpy as np

from pandas._libs import (
NaT,
algos,
lib,
)
Expand Down Expand Up @@ -457,6 +458,11 @@ def _interpolate_1d(
# sort preserve_nans and convert to list
preserve_nans = sorted(preserve_nans)

is_datetimelike = needs_i8_conversion(yvalues.dtype)

if is_datetimelike:
yvalues = yvalues.view("i8")

if method in NP_METHODS:
# np.interp requires sorted X values, #21037

Expand All @@ -476,7 +482,10 @@ def _interpolate_1d(
**kwargs,
)

yvalues[preserve_nans] = np.nan
if is_datetimelike:
yvalues[preserve_nans] = NaT.value
else:
yvalues[preserve_nans] = np.nan
return


Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/frame/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,36 @@

from pandas import (
DataFrame,
NaT,
Series,
date_range,
)
import pandas._testing as tm


class TestDataFrameInterpolate:
def test_interpolate_datetimelike_values(self, frame_or_series):
# GH#11312, GH#51005
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = NaT

res = frame_or_series(ser).interpolate()
expected = frame_or_series(orig)
tm.assert_equal(res, expected)

# datetime64tz cast
ser_tz = ser.dt.tz_localize("US/Pacific")
res_tz = frame_or_series(ser_tz).interpolate()
expected_tz = frame_or_series(orig.dt.tz_localize("US/Pacific"))
tm.assert_equal(res_tz, expected_tz)

# timedelta64 cast
ser_td = ser - ser[0]
res_td = frame_or_series(ser_td).interpolate()
expected_td = frame_or_series(orig - orig[0])
tm.assert_equal(res_td, expected_td)

def test_interpolate_inplace(self, frame_or_series, using_array_manager, request):
# GH#44749
if using_array_manager and frame_or_series is DataFrame:
Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/series/methods/test_interpolate.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,18 @@ def interp_methods_ind(request):


class TestSeriesInterpolateData:
@pytest.mark.xfail(reason="EA.fillna does not handle 'linear' method")
def test_interpolate_period_values(self):
orig = Series(date_range("2012-01-01", periods=5))
ser = orig.copy()
ser[2] = pd.NaT

# period cast
ser_per = ser.dt.to_period("D")
res_per = ser_per.interpolate()
expected_per = orig.dt.to_period("D")
tm.assert_series_equal(res_per, expected_per)

def test_interpolate(self, datetime_series):
ts = Series(np.arange(len(datetime_series), dtype=float), datetime_series.index)

Expand Down