diff --git a/doc/source/whatsnew/v1.4.0.rst b/doc/source/whatsnew/v1.4.0.rst index 4a4e7dd6d15d7..82415319e96ce 100644 --- a/doc/source/whatsnew/v1.4.0.rst +++ b/doc/source/whatsnew/v1.4.0.rst @@ -693,6 +693,7 @@ Missing - Bug in :meth:`DataFrame.fillna` with limit and no method ignores axis='columns' or ``axis = 1`` (:issue:`40989`) - Bug in :meth:`DataFrame.fillna` not replacing missing values when using a dict-like ``value`` and duplicate column names (:issue:`43476`) - Bug in constructing a :class:`DataFrame` with a dictionary ``np.datetime64`` as a value and ``dtype='timedelta64[ns]'``, or vice-versa, incorrectly casting instead of raising (:issue:`??`) +- Bug in :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` with ``inplace=True`` not writing to the underlying array(s) in-place (:issue:`44749`) - MultiIndex diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f93f386261005..f4b301c25c603 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -1079,7 +1079,7 @@ def interpolate( data = self.values if inplace else self.values.copy() data = cast(np.ndarray, data) # bc overridden by ExtensionBlock - interp_values = missing.interpolate_array_2d( + missing.interpolate_array_2d( data, method=method, axis=axis, @@ -1091,7 +1091,7 @@ def interpolate( **kwargs, ) - nb = self.make_block_same_class(interp_values) + nb = self.make_block_same_class(data) return nb._maybe_downcast([nb], downcast) def take_nd( diff --git a/pandas/core/missing.py b/pandas/core/missing.py index ede0878f15caa..8a3d892876b5c 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -217,11 +217,13 @@ def interpolate_array_2d( coerce: bool = False, downcast: str | None = None, **kwargs, -) -> np.ndarray: +) -> None: """ Wrapper to dispatch to either interpolate_2d or _interpolate_2d_with_fill. - Returned ndarray has same dtype as 'data'. + Notes + ----- + Alters 'data' in-place. """ try: m = clean_fill_method(method) @@ -240,11 +242,10 @@ def interpolate_array_2d( limit=limit, limit_area=limit_area, ) - interp_values = data else: assert index is not None # for mypy - interp_values = _interpolate_2d_with_fill( + _interpolate_2d_with_fill( data=data, index=index, axis=axis, @@ -255,7 +256,7 @@ def interpolate_array_2d( fill_value=fill_value, **kwargs, ) - return interp_values + return def _interpolate_2d_with_fill( @@ -268,13 +269,15 @@ def _interpolate_2d_with_fill( limit_area: str | None = None, fill_value: Any | None = None, **kwargs, -) -> np.ndarray: +) -> None: """ Column-wise application of _interpolate_1d. Notes ----- - The signature does differs from _interpolate_1d because it only + Alters 'data' in-place. + + The signature does differ from _interpolate_1d because it only includes what is needed for Block.interpolate. """ # validate the interp method @@ -314,12 +317,10 @@ def _interpolate_2d_with_fill( indices = _index_to_interp_indices(index, method) - def func(yvalues: np.ndarray) -> np.ndarray: - # process 1-d slices in the axis direction, returning it + def func(yvalues: np.ndarray) -> None: + # process 1-d slices in the axis direction - # should the axis argument be handled below in apply_along_axis? - # i.e. not an arg to _interpolate_1d - return _interpolate_1d( + _interpolate_1d( indices=indices, yvalues=yvalues, method=method, @@ -332,7 +333,8 @@ def func(yvalues: np.ndarray) -> np.ndarray: ) # interp each column independently - return np.apply_along_axis(func, axis, data) + np.apply_along_axis(func, axis, data) + return def _index_to_interp_indices(index: Index, method: str) -> np.ndarray: @@ -370,23 +372,25 @@ def _interpolate_1d( **kwargs, ): """ - Logic for the 1-d interpolation. The result should be 1-d, inputs + Logic for the 1-d interpolation. The input indices and yvalues will each be 1-d arrays of the same length. Bounds_error is currently hardcoded to False since non-scipy ones don't take it as an argument. + + Notes + ----- + Fills 'yvalues' in-place. """ invalid = isna(yvalues) valid = ~invalid if not valid.any(): - result = np.empty(indices.shape, dtype=np.float64) - result.fill(np.nan) - return result + return if valid.all(): - return yvalues + return # These are sets of index pointers to invalid values... i.e. {0, 1, etc... all_nans = set(np.flatnonzero(invalid)) @@ -432,17 +436,15 @@ def _interpolate_1d( # sort preserve_nans and convert to list preserve_nans = sorted(preserve_nans) - result = yvalues.copy() - if method in NP_METHODS: # np.interp requires sorted X values, #21037 indexer = np.argsort(indices[valid]) - result[invalid] = np.interp( + yvalues[invalid] = np.interp( indices[invalid], indices[valid][indexer], yvalues[valid][indexer] ) else: - result[invalid] = _interpolate_scipy_wrapper( + yvalues[invalid] = _interpolate_scipy_wrapper( indices[valid], yvalues[valid], indices[invalid], @@ -453,8 +455,8 @@ def _interpolate_1d( **kwargs, ) - result[preserve_nans] = np.nan - return result + yvalues[preserve_nans] = np.nan + return def _interpolate_scipy_wrapper( diff --git a/pandas/tests/frame/methods/test_interpolate.py b/pandas/tests/frame/methods/test_interpolate.py index 93225ff1050a3..37fb0754baffd 100644 --- a/pandas/tests/frame/methods/test_interpolate.py +++ b/pandas/tests/frame/methods/test_interpolate.py @@ -12,6 +12,23 @@ class TestDataFrameInterpolate: + def test_interpolate_inplace(self, frame_or_series, using_array_manager, request): + # GH#44749 + if using_array_manager and frame_or_series is DataFrame: + mark = pytest.mark.xfail(reason=".values-based in-place check is invalid") + request.node.add_marker(mark) + + obj = frame_or_series([1, np.nan, 2]) + orig = obj.values + + obj.interpolate(inplace=True) + expected = frame_or_series([1, 1.5, 2]) + tm.assert_equal(obj, expected) + + # check we operated *actually* inplace + assert np.shares_memory(orig, obj.values) + assert orig.squeeze()[1] == 1.5 + def test_interp_basic(self): df = DataFrame( {