From 74614cb66a3bc011c57e0befcaa2cd69e6d31a6c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 22 Feb 2018 11:07:35 -0600 Subject: [PATCH 01/18] ENH: ExtensionArray.fillna --- pandas/core/arrays/base.py | 84 +++++++++++++++++++ pandas/core/internals.py | 38 ++++----- pandas/tests/extension/base/missing.py | 80 ++++++++++++++++++ .../extension/category/test_categorical.py | 5 +- .../tests/extension/decimal/test_decimal.py | 75 ++++++++--------- pandas/tests/extension/json/test_json.py | 8 +- 6 files changed, 225 insertions(+), 65 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cec881394a021..8dc4ddbff7d5c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,4 +1,6 @@ """An interface for extending pandas with custom arrays.""" +import itertools + import numpy as np from pandas.errors import AbstractMethodError @@ -216,6 +218,88 @@ def isna(self): """ raise AbstractMethodError(self) + def tolist(self): + # type: () -> list + """Convert the array to a list of scalars.""" + return list(self) + + def fillna(self, value=None, method=None, limit=None): + """ Fill NA/NaN values using the specified method. + + Parameters + ---------- + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap + value : scalar, array-like + If a scalar value is passed it is used to fill all missing values. + Alternatively, an array-like 'value' can be given. It's expected + that the array-like have the same length as 'self'. + limit : int, default None + (Not implemented yet for ExtensionArray!) + If method is specified, this is the maximum number of consecutive + NaN values to forward/backward fill. In other words, if there is + a gap with more than this number of consecutive NaNs, it will only + be partially filled. If method is not specified, this is the + maximum number of entries along the entire axis where NaNs will be + filled. + + Returns + ------- + filled : ExtensionArray with NA/NaN filled + """ + from pandas.api.types import is_scalar + from pandas.util._validators import validate_fillna_kwargs + + value, method = validate_fillna_kwargs(value, method) + + if not is_scalar(value): + if len(value) != len(self): + raise ValueError("Length of 'value' does not match. Got ({}) " + " expected {}".format(len(value), len(self))) + else: + value = itertools.cycle([value]) + + if limit is not None: + msg = ("Specifying 'limit' for 'fillna' has not been implemented " + "yet for {} typed data".format(self.dtype)) + raise NotImplementedError(msg) + + mask = self.isna() + + if mask.any(): + # ffill / bfill + if method is not None: + if method == 'backfill': + data = reversed(self) + mask = reversed(mask) + last_valid = self[len(self) - 1] + else: + last_valid = self[0] + data = self + + new_values = [] + + for is_na, val in zip(mask, data): + if is_na: + new_values.append(last_valid) + else: + new_values.append(val) + last_valid = val + + if method in {'bfill', 'backfill'}: + new_values = list(reversed(new_values)) + else: + # fill with value + new_values = [ + val if is_na else original + for is_na, original, val in zip(mask, self, value) + ] + else: + new_values = self + return type(self)(new_values) + # ------------------------------------------------------------------------ # Indexing methods # ------------------------------------------------------------------------ diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 00ef8f9cef598..da7329e6ced23 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -1963,6 +1963,23 @@ def concat_same_type(self, to_concat, placement=None): return self.make_block_same_class(values, ndim=self.ndim, placement=placement) + def fillna(self, value, limit=None, inplace=False, downcast=None, + mgr=None): + values = self.values if inplace else self.values.copy() + values = values.fillna(value=value, limit=limit) + return [self.make_block_same_class(values=values, + placement=self.mgr_locs, + ndim=self.ndim)] + + def interpolate(self, method='pad', axis=0, inplace=False, limit=None, + fill_value=None, **kwargs): + + values = self.values if inplace else self.values.copy() + return self.make_block_same_class( + values=values.fillna(value=fill_value, method=method, + limit=limit), + placement=self.mgr_locs) + class NumericBlock(Block): __slots__ = () @@ -2522,27 +2539,6 @@ def _try_coerce_result(self, result): return result - def fillna(self, value, limit=None, inplace=False, downcast=None, - mgr=None): - # we may need to upcast our fill to match our dtype - if limit is not None: - raise NotImplementedError("specifying a limit for 'fillna' has " - "not been implemented yet") - - values = self.values if inplace else self.values.copy() - values = self._try_coerce_result(values.fillna(value=value, - limit=limit)) - return [self.make_block(values=values)] - - def interpolate(self, method='pad', axis=0, inplace=False, limit=None, - fill_value=None, **kwargs): - - values = self.values if inplace else self.values.copy() - return self.make_block_same_class( - values=values.fillna(fill_value=fill_value, method=method, - limit=limit), - placement=self.mgr_locs) - def shift(self, periods, axis=0, mgr=None): return self.make_block_same_class(values=self.values.shift(periods), placement=self.mgr_locs) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 3ae82fa1ca432..086bd0c3b95fa 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -1,4 +1,5 @@ import numpy as np +import pytest import pandas as pd import pandas.util.testing as tm @@ -45,3 +46,82 @@ def test_dropna_frame(self, data_missing): result = df.dropna() expected = df.iloc[:0] self.assert_frame_equal(result, expected) + + def test_fillna_limit_raises(self, data_missing): + ser = pd.Series(data_missing) + fill_value = data_missing[1] + xpr = "Specifying 'limit' for 'fillna'.*{}".format(data_missing.dtype) + + with tm.assert_raises_regex(NotImplementedError, xpr): + ser.fillna(fill_value, limit=2) + + def test_fillna_series(self, data_missing): + fill_value = data_missing[1] + ser = pd.Series(data_missing) + + result = ser.fillna(fill_value) + expected = pd.Series(type(data_missing)([fill_value, fill_value])) + self.assert_series_equal(result, expected) + + # Fill with a series + result = ser.fillna(expected) + self.assert_series_equal(result, expected) + + # Fill with a series not affecting the missing values + result = ser.fillna(ser) + self.assert_series_equal(result, ser) + + @pytest.mark.xfail(reason="Too magical?") + def test_fillna_series_with_dict(self, data_missing): + fill_value = data_missing[1] + ser = pd.Series(data_missing) + expected = pd.Series(type(data_missing)([fill_value, fill_value])) + + # Fill with a dict + result = ser.fillna({0: fill_value}) + self.assert_series_equal(result, expected) + + # Fill with a dict not affecting the missing values + result = ser.fillna({1: fill_value}) + ser = pd.Series(data_missing) + self.assert_series_equal(result, ser) + + @pytest.mark.parametrize('method', ['ffill', 'bfill']) + def test_fillna_series_method(self, data_missing, method): + fill_value = data_missing[1] + + if method == 'ffill': + data_missing = type(data_missing)(data_missing[::-1]) + + result = pd.Series(data_missing).fillna(method=method) + expected = pd.Series(type(data_missing)([fill_value, fill_value])) + + self.assert_series_equal(result, expected) + + def test_fillna_frame(self, data_missing): + fill_value = data_missing[1] + + result = pd.DataFrame({ + "A": data_missing, + "B": [1, 2] + }).fillna(fill_value) + + expected = pd.DataFrame({ + "A": type(data_missing)([fill_value, fill_value]), + "B": [1, 2], + }) + + self.assert_frame_equal(result, expected) + + def test_fillna_fill_other(self, data): + result = pd.DataFrame({ + "A": data, + "B": [np.nan] * len(data) + }).fillna({"B": 0.0}) + + expected = pd.DataFrame({ + "A": data, + "B": [0.0] * len(result), + }) + + self.assert_frame_equal(result, expected) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 8f413b4a19730..ddd8d01b841c7 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -69,7 +69,10 @@ def test_getitem_scalar(self): class TestMissing(base.BaseMissingTests): - pass + + @pytest.mark.skip(reason="Backwards compatability") + def test_fillna_limit_raises(self): + """Has a different error message.""" class TestMethods(base.BaseMethodsTests): diff --git a/pandas/tests/extension/decimal/test_decimal.py b/pandas/tests/extension/decimal/test_decimal.py index 7b4d079ecad87..01ae092bc1521 100644 --- a/pandas/tests/extension/decimal/test_decimal.py +++ b/pandas/tests/extension/decimal/test_decimal.py @@ -35,68 +35,59 @@ def na_value(): return decimal.Decimal("NaN") -class TestDtype(base.BaseDtypeTests): - pass +class BaseDecimal(object): + @staticmethod + def assert_series_equal(left, right, *args, **kwargs): + # tm.assert_series_equal doesn't handle Decimal('NaN'). + # We will ensure that the NA values match, and then + # drop those values before moving on. + left_na = left.isna() + right_na = right.isna() -class TestInterface(base.BaseInterfaceTests): - pass + tm.assert_series_equal(left_na, right_na) + tm.assert_series_equal(left[~left_na], right[~right_na], + *args, **kwargs) + @staticmethod + def assert_frame_equal(left, right, *args, **kwargs): + # TODO(EA): select_dtypes + decimals = (left.dtypes == 'decimal').index -class TestConstructors(base.BaseConstructorsTests): - pass + for col in decimals: + BaseDecimal.assert_series_equal(left[col], right[col], + *args, **kwargs) + left = left.drop(columns=decimals) + right = right.drop(columns=decimals) + tm.assert_frame_equal(left, right, *args, **kwargs) -class TestReshaping(base.BaseReshapingTests): - def test_align(self, data, na_value): - # Have to override since assert_series_equal doesn't - # compare Decimal(NaN) properly. - a = data[:3] - b = data[2:5] - r1, r2 = pd.Series(a).align(pd.Series(b, index=[1, 2, 3])) +class TestDtype(BaseDecimal, base.BaseDtypeTests): + pass - # NaN handling - e1 = pd.Series(type(data)(list(a) + [na_value])) - e2 = pd.Series(type(data)([na_value] + list(b))) - tm.assert_series_equal(r1.iloc[:3], e1.iloc[:3]) - assert r1[3].is_nan() - assert e1[3].is_nan() - tm.assert_series_equal(r2.iloc[1:], e2.iloc[1:]) - assert r2[0].is_nan() - assert e2[0].is_nan() +class TestInterface(BaseDecimal, base.BaseInterfaceTests): + pass - def test_align_frame(self, data, na_value): - # Override for Decimal(NaN) comparison - a = data[:3] - b = data[2:5] - r1, r2 = pd.DataFrame({'A': a}).align( - pd.DataFrame({'A': b}, index=[1, 2, 3]) - ) - # Assumes that the ctor can take a list of scalars of the type - e1 = pd.DataFrame({'A': type(data)(list(a) + [na_value])}) - e2 = pd.DataFrame({'A': type(data)([na_value] + list(b))}) +class TestConstructors(BaseDecimal, base.BaseConstructorsTests): + pass - tm.assert_frame_equal(r1.iloc[:3], e1.iloc[:3]) - assert r1.loc[3, 'A'].is_nan() - assert e1.loc[3, 'A'].is_nan() - tm.assert_frame_equal(r2.iloc[1:], e2.iloc[1:]) - assert r2.loc[0, 'A'].is_nan() - assert e2.loc[0, 'A'].is_nan() +class TestReshaping(BaseDecimal, base.BaseReshapingTests): + pass -class TestGetitem(base.BaseGetitemTests): +class TestGetitem(BaseDecimal, base.BaseGetitemTests): pass -class TestMissing(base.BaseMissingTests): +class TestMissing(BaseDecimal, base.BaseMissingTests): pass -class TestMethods(base.BaseMethodsTests): +class TestMethods(BaseDecimal, base.BaseMethodsTests): @pytest.mark.parametrize('dropna', [True, False]) @pytest.mark.xfail(reason="value_counts not implemented yet.") def test_value_counts(self, all_data, dropna): @@ -112,7 +103,7 @@ def test_value_counts(self, all_data, dropna): tm.assert_series_equal(result, expected) -class TestCasting(base.BaseCastingTests): +class TestCasting(BaseDecimal, base.BaseCastingTests): pass diff --git a/pandas/tests/extension/json/test_json.py b/pandas/tests/extension/json/test_json.py index e0721bb1d8d1a..16d5e4415a79f 100644 --- a/pandas/tests/extension/json/test_json.py +++ b/pandas/tests/extension/json/test_json.py @@ -60,7 +60,13 @@ class TestGetitem(base.BaseGetitemTests): class TestMissing(base.BaseMissingTests): - pass + @pytest.mark.xfail(reason="Setting a dict as a scalar") + def test_fillna_series(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" + + @pytest.mark.xfail(reason="Setting a dict as a scalar") + def test_fillna_frame(self): + """We treat dictionaries as a mapping in fillna, not a scalar.""" class TestMethods(base.BaseMethodsTests): From 67a19ba26cbe8a205a65a41a0278832caef095a0 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 06:42:24 -0600 Subject: [PATCH 02/18] REF: cast to object --- pandas/core/arrays/base.py | 40 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8dc4ddbff7d5c..63aa5fe5e25aa 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -251,51 +251,35 @@ def fillna(self, value=None, method=None, limit=None): """ from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs + from pandas.core.missing import pad_1d, backfill_1d value, method = validate_fillna_kwargs(value, method) + mask = self.isna() + if not is_scalar(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) - else: - value = itertools.cycle([value]) + value = value[mask] if limit is not None: msg = ("Specifying 'limit' for 'fillna' has not been implemented " "yet for {} typed data".format(self.dtype)) raise NotImplementedError(msg) - mask = self.isna() - if mask.any(): # ffill / bfill - if method is not None: - if method == 'backfill': - data = reversed(self) - mask = reversed(mask) - last_valid = self[len(self) - 1] - else: - last_valid = self[0] - data = self - - new_values = [] - - for is_na, val in zip(mask, data): - if is_na: - new_values.append(last_valid) - else: - new_values.append(val) - last_valid = val - - if method in {'bfill', 'backfill'}: - new_values = list(reversed(new_values)) + if method == 'pad': + values = self.astype(object) + new_values = pad_1d(values, mask=mask) + elif method == 'backfill': + values = self.astype(object) + new_values = backfill_1d(values, mask=mask) else: # fill with value - new_values = [ - val if is_na else original - for is_na, original, val in zip(mask, self, value) - ] + new_values = self.copy() + new_values[mask] = value else: new_values = self return type(self)(new_values) From 280ac9487c1000005644b4a51cbfa313b0a58f6e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Tue, 27 Feb 2018 06:56:36 -0600 Subject: [PATCH 03/18] Revert "REF: cast to object" This reverts commit 67a19ba26cbe8a205a65a41a0278832caef095a0. --- pandas/core/arrays/base.py | 40 ++++++++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 63aa5fe5e25aa..8dc4ddbff7d5c 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -251,35 +251,51 @@ def fillna(self, value=None, method=None, limit=None): """ from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs - from pandas.core.missing import pad_1d, backfill_1d value, method = validate_fillna_kwargs(value, method) - mask = self.isna() - if not is_scalar(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) - value = value[mask] + else: + value = itertools.cycle([value]) if limit is not None: msg = ("Specifying 'limit' for 'fillna' has not been implemented " "yet for {} typed data".format(self.dtype)) raise NotImplementedError(msg) + mask = self.isna() + if mask.any(): # ffill / bfill - if method == 'pad': - values = self.astype(object) - new_values = pad_1d(values, mask=mask) - elif method == 'backfill': - values = self.astype(object) - new_values = backfill_1d(values, mask=mask) + if method is not None: + if method == 'backfill': + data = reversed(self) + mask = reversed(mask) + last_valid = self[len(self) - 1] + else: + last_valid = self[0] + data = self + + new_values = [] + + for is_na, val in zip(mask, data): + if is_na: + new_values.append(last_valid) + else: + new_values.append(val) + last_valid = val + + if method in {'bfill', 'backfill'}: + new_values = list(reversed(new_values)) else: # fill with value - new_values = self.copy() - new_values[mask] = value + new_values = [ + val if is_na else original + for is_na, original, val in zip(mask, self, value) + ] else: new_values = self return type(self)(new_values) From 69a0f9b76b874b7680728c0dd6cf22a309ab9511 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Feb 2018 11:49:02 -0600 Subject: [PATCH 04/18] Simpler implementation --- pandas/core/arrays/base.py | 44 ++++++++------------------ pandas/tests/extension/base/missing.py | 15 --------- 2 files changed, 13 insertions(+), 46 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8dc4ddbff7d5c..3f481651fe874 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -1,6 +1,4 @@ """An interface for extending pandas with custom arrays.""" -import itertools - import numpy as np from pandas.errors import AbstractMethodError @@ -251,53 +249,37 @@ def fillna(self, value=None, method=None, limit=None): """ from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs + from pandas.core.missing import pad_1d, backfill_1d value, method = validate_fillna_kwargs(value, method) + mask = self.isna() + if not is_scalar(value): if len(value) != len(self): raise ValueError("Length of 'value' does not match. Got ({}) " " expected {}".format(len(value), len(self))) - else: - value = itertools.cycle([value]) + value = value[mask] if limit is not None: msg = ("Specifying 'limit' for 'fillna' has not been implemented " "yet for {} typed data".format(self.dtype)) raise NotImplementedError(msg) - mask = self.isna() - if mask.any(): - # ffill / bfill if method is not None: - if method == 'backfill': - data = reversed(self) - mask = reversed(mask) - last_valid = self[len(self) - 1] - else: - last_valid = self[0] - data = self - - new_values = [] - - for is_na, val in zip(mask, data): - if is_na: - new_values.append(last_valid) - else: - new_values.append(val) - last_valid = val - - if method in {'bfill', 'backfill'}: - new_values = list(reversed(new_values)) + # ffill / bfill + func = pad_1d if method == 'pad' else backfill_1d + idx = np.arange(len(self), dtype=np.float64) + idx[mask] = np.nan + idx = func(idx, mask=mask).astype(np.int64) + new_values = self.take(idx) else: # fill with value - new_values = [ - val if is_na else original - for is_na, original, val in zip(mask, self, value) - ] + new_values = self.copy() + new_values[mask] = value else: - new_values = self + new_values = self.copy() return type(self)(new_values) # ------------------------------------------------------------------------ diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 086bd0c3b95fa..3f87194d971dc 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -71,21 +71,6 @@ def test_fillna_series(self, data_missing): result = ser.fillna(ser) self.assert_series_equal(result, ser) - @pytest.mark.xfail(reason="Too magical?") - def test_fillna_series_with_dict(self, data_missing): - fill_value = data_missing[1] - ser = pd.Series(data_missing) - expected = pd.Series(type(data_missing)([fill_value, fill_value])) - - # Fill with a dict - result = ser.fillna({0: fill_value}) - self.assert_series_equal(result, expected) - - # Fill with a dict not affecting the missing values - result = ser.fillna({1: fill_value}) - ser = pd.Series(data_missing) - self.assert_series_equal(result, ser) - @pytest.mark.parametrize('method', ['ffill', 'bfill']) def test_fillna_series_method(self, data_missing, method): fill_value = data_missing[1] From 4d6846bb8601fe6797f5b86b0829a3dc1dcd66f1 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Feb 2018 13:43:38 -0600 Subject: [PATCH 05/18] Support limit --- pandas/core/arrays/base.py | 31 ++++++++++--------- pandas/core/arrays/categorical.py | 8 ++--- pandas/tests/extension/base/missing.py | 12 +++---- .../extension/category/test_categorical.py | 6 ++-- 4 files changed, 29 insertions(+), 28 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 3f481651fe874..6906a9069526e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -226,16 +226,15 @@ def fillna(self, value=None, method=None, limit=None): Parameters ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap value : scalar, array-like If a scalar value is passed it is used to fill all missing values. Alternatively, an array-like 'value' can be given. It's expected that the array-like have the same length as 'self'. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap limit : int, default None - (Not implemented yet for ExtensionArray!) If method is specified, this is the maximum number of consecutive NaN values to forward/backward fill. In other words, if there is a gap with more than this number of consecutive NaNs, it will only @@ -250,6 +249,8 @@ def fillna(self, value=None, method=None, limit=None): from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs from pandas.core.missing import pad_1d, backfill_1d + from pandas.core.dtypes.common import _ensure_platform_int + from pandas._libs.tslib import iNaT value, method = validate_fillna_kwargs(value, method) @@ -261,18 +262,20 @@ def fillna(self, value=None, method=None, limit=None): " expected {}".format(len(value), len(self))) value = value[mask] - if limit is not None: - msg = ("Specifying 'limit' for 'fillna' has not been implemented " - "yet for {} typed data".format(self.dtype)) - raise NotImplementedError(msg) - if mask.any(): if method is not None: # ffill / bfill + # The basic idea is to create an array of integer positions. + # Internally, we use iNaT and the datetime filling routines + # to avoid floating-point NaN. Once filled, we take on `self` + # to get the actual values. func = pad_1d if method == 'pad' else backfill_1d - idx = np.arange(len(self), dtype=np.float64) - idx[mask] = np.nan - idx = func(idx, mask=mask).astype(np.int64) + idx = np.arange(len(self), dtype='int64') + idx[mask] = iNaT + idx = _ensure_platform_int(func(idx, mask=mask, + limit=limit, + dtype='datetime64[ns]')) + idx[idx == iNaT] = -1 # missing value marker for take. new_values = self.take(idx) else: # fill with value @@ -280,7 +283,7 @@ def fillna(self, value=None, method=None, limit=None): new_values[mask] = value else: new_values = self.copy() - return type(self)(new_values) + return new_values # ------------------------------------------------------------------------ # Indexing methods diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index c6eeabf0148d0..abba9f8a90528 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1587,16 +1587,16 @@ def fillna(self, value=None, method=None, limit=None): Parameters ---------- - method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None - Method to use for filling holes in reindexed Series - pad / ffill: propagate last valid observation forward to next valid - backfill / bfill: use NEXT valid observation to fill gap value : scalar, dict, Series If a scalar value is passed it is used to fill all missing values. Alternatively, a Series or dict can be used to fill in different values for each index. The value should not be a list. The value(s) passed should either be in the categories or should be NaN. + method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None + Method to use for filling holes in reindexed Series + pad / ffill: propagate last valid observation forward to next valid + backfill / bfill: use NEXT valid observation to fill gap limit : int, default None (Not implemented yet for Categorical!) If method is specified, this is the maximum number of consecutive diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 3f87194d971dc..24e037ccea09c 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -47,13 +47,11 @@ def test_dropna_frame(self, data_missing): expected = df.iloc[:0] self.assert_frame_equal(result, expected) - def test_fillna_limit_raises(self, data_missing): - ser = pd.Series(data_missing) - fill_value = data_missing[1] - xpr = "Specifying 'limit' for 'fillna'.*{}".format(data_missing.dtype) - - with tm.assert_raises_regex(NotImplementedError, xpr): - ser.fillna(fill_value, limit=2) + def test_fillna_limit(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method='ffill', limit=2) + expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) + self.assert_series_equal(result, expected) def test_fillna_series(self, data_missing): fill_value = data_missing[1] diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index ddd8d01b841c7..7a380f85b24c3 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -70,9 +70,9 @@ def test_getitem_scalar(self): class TestMissing(base.BaseMissingTests): - @pytest.mark.skip(reason="Backwards compatability") - def test_fillna_limit_raises(self): - """Has a different error message.""" + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit(self): + pass class TestMethods(base.BaseMethodsTests): From f3b81dcd0b47d809aaa5a826131ab96764405f4f Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Feb 2018 13:47:29 -0600 Subject: [PATCH 06/18] Test backfill with limit --- pandas/tests/extension/base/missing.py | 8 +++++++- pandas/tests/extension/category/test_categorical.py | 6 +++++- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py index 24e037ccea09c..bf404ac01bf2b 100644 --- a/pandas/tests/extension/base/missing.py +++ b/pandas/tests/extension/base/missing.py @@ -47,12 +47,18 @@ def test_dropna_frame(self, data_missing): expected = df.iloc[:0] self.assert_frame_equal(result, expected) - def test_fillna_limit(self, data_missing): + def test_fillna_limit_pad(self, data_missing): arr = data_missing.take([1, 0, 0, 0, 1]) result = pd.Series(arr).fillna(method='ffill', limit=2) expected = pd.Series(data_missing.take([1, 1, 1, 0, 1])) self.assert_series_equal(result, expected) + def test_fillna_limit_backfill(self, data_missing): + arr = data_missing.take([1, 0, 0, 0, 1]) + result = pd.Series(arr).fillna(method='backfill', limit=2) + expected = pd.Series(data_missing.take([1, 0, 1, 1, 1])) + self.assert_series_equal(result, expected) + def test_fillna_series(self, data_missing): fill_value = data_missing[1] ser = pd.Series(data_missing) diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py index 7a380f85b24c3..b6dd181c1d8f3 100644 --- a/pandas/tests/extension/category/test_categorical.py +++ b/pandas/tests/extension/category/test_categorical.py @@ -71,7 +71,11 @@ def test_getitem_scalar(self): class TestMissing(base.BaseMissingTests): @pytest.mark.skip(reason="Not implemented") - def test_fillna_limit(self): + def test_fillna_limit_pad(self): + pass + + @pytest.mark.skip(reason="Not implemented") + def test_fillna_limit_backfill(self): pass From 70efbb8703d579d495b4f83e43b1b26d27070603 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 28 Feb 2018 16:26:26 -0600 Subject: [PATCH 07/18] BUG: ensure array-like for indexer --- pandas/core/arrays/base.py | 1 + pandas/tests/extension/decimal/array.py | 1 + 2 files changed, 2 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6906a9069526e..7f7a79e160338 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -322,6 +322,7 @@ def take(self, indexer, allow_fill=True, fill_value=None): .. code-block:: python def take(self, indexer, allow_fill=True, fill_value=None): + indexer = np.asarray(indexer) mask = indexer == -1 result = self.data.take(indexer) result[mask] = np.nan # NA for this type diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py index 8b2eaadeca99e..e99001a3742a7 100644 --- a/pandas/tests/extension/decimal/array.py +++ b/pandas/tests/extension/decimal/array.py @@ -67,6 +67,7 @@ def isna(self): return np.array([x.is_nan() for x in self.values]) def take(self, indexer, allow_fill=True, fill_value=None): + indexer = np.asarray(indexer) mask = indexer == -1 indexer = _ensure_platform_int(indexer) From 8fc3851cc0beb445c8fb4c446cdefb30c97b6eb2 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Mar 2018 13:54:16 -0600 Subject: [PATCH 08/18] Refactor tolist Moved to a new method in dtypes.cast. --- pandas/core/arrays/base.py | 5 ---- pandas/core/base.py | 8 ++---- pandas/core/dtypes/cast.py | 32 +++++++++++++++++++++++- pandas/tests/dtypes/test_cast.py | 15 ++++++++++- pandas/tests/extension/base/interface.py | 6 +++++ 5 files changed, 53 insertions(+), 13 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 7f7a79e160338..b3380d530c333 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -216,11 +216,6 @@ def isna(self): """ raise AbstractMethodError(self) - def tolist(self): - # type: () -> list - """Convert the array to a list of scalars.""" - return list(self) - def fillna(self, value=None, method=None, limit=None): """ Fill NA/NaN values using the specified method. diff --git a/pandas/core/base.py b/pandas/core/base.py index 280b8849792e3..fcac9bbf51f81 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,11 +8,11 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass +from pandas.core.dtypes.cast import tolist from pandas.core.dtypes.common import ( is_object_dtype, is_list_like, is_scalar, - is_datetimelike, is_extension_type, is_extension_array_dtype) @@ -826,11 +826,7 @@ def tolist(self): -------- numpy.ndarray.tolist """ - - if is_datetimelike(self): - return [com._maybe_box_datetimelike(x) for x in self._values] - else: - return self._values.tolist() + return tolist(self._values) def __iter__(self): """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index b1d0dc2a2442e..f8790c0b25cb0 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -11,7 +11,8 @@ from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, - is_extension_type, is_object_dtype, + is_extension_type, is_extension_array_dtype, + is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, is_timedelta64_dtype, is_timedelta64_ns_dtype, @@ -1222,3 +1223,32 @@ def construct_1d_object_array_from_listlike(values): result = np.empty(len(values), dtype='object') result[:] = values return result + + +def tolist(values): + """Convert an array-like to a list of scalar types. + + Parameters + ---------- + values : ndarray or ExtensionArray + + Returns + ------- + list + Each element of the list is a Python scalar (stor, int float) + or a pandas scalar (Timestamp / Timedelta / Interval / Period) or + the scalar type for 3rd party Extension Arrays. + + See Also + -------- + Series.tolist + numpy.ndarray.tolist + """ + from pandas.core.common import _maybe_box_datetimelike + + if is_datetimelike(values): + return [_maybe_box_datetimelike(x) for x in values] + elif is_extension_array_dtype(values): + return list(values) + else: + return values.tolist() diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index 96a9e3227b40b..acc5657b1d952 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -23,7 +23,8 @@ maybe_convert_scalar, find_common_type, construct_1d_object_array_from_listlike, - construct_1d_arraylike_from_scalar) + construct_1d_arraylike_from_scalar, + tolist) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -324,6 +325,18 @@ def test_maybe_convert_objects_copy(self): out = maybe_convert_objects(values, copy=True) assert values is not out + @pytest.mark.parametrize('values, expected', [ + (pd.date_range('2017', periods=1), [pd.Timestamp('2017')]), + (pd.period_range('2017', periods=1, freq='D'), + [pd.Period('2017', freq='D')]), + (pd.timedelta_range(0, periods=1), [pd.Timedelta('0')]), + (pd.interval_range(0, periods=1), [pd.Interval(0, 1)]), + (np.array([0, 1]), [0, 1]), + ]) + def test_tolist(self, values, expected): + result = tolist(values) + assert result == expected + class TestCommonTypes(object): diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index e1596f0675f32..bf1c31742182d 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -2,6 +2,7 @@ import pandas as pd from pandas.compat import StringIO +from pandas.core.dtypes.cast import tolist from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype @@ -53,3 +54,8 @@ def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data.dtype) assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) + + def test_tolist(self, data): + result = tolist(data) + expected = list(data) + assert result == expected From 39096e52cdf6cd245e5fff22fd2b6f16021882b9 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Mar 2018 14:33:43 -0600 Subject: [PATCH 09/18] Test Series[ea].tolist Moved to test_casting --- pandas/tests/extension/base/casting.py | 5 +++++ pandas/tests/extension/base/interface.py | 6 ------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py index adc690939b36c..74fe8f196a089 100644 --- a/pandas/tests/extension/base/casting.py +++ b/pandas/tests/extension/base/casting.py @@ -11,3 +11,8 @@ def test_astype_object_series(self, all_data): ser = pd.Series({"A": all_data}) result = ser.astype(object) assert isinstance(result._data.blocks[0], ObjectBlock) + + def test_tolist(self, data): + result = pd.Series(data).tolist() + expected = list(data) + assert result == expected diff --git a/pandas/tests/extension/base/interface.py b/pandas/tests/extension/base/interface.py index bf1c31742182d..e1596f0675f32 100644 --- a/pandas/tests/extension/base/interface.py +++ b/pandas/tests/extension/base/interface.py @@ -2,7 +2,6 @@ import pandas as pd from pandas.compat import StringIO -from pandas.core.dtypes.cast import tolist from pandas.core.dtypes.common import is_extension_array_dtype from pandas.core.dtypes.dtypes import ExtensionDtype @@ -54,8 +53,3 @@ def test_is_extension_array_dtype(self, data): assert is_extension_array_dtype(data.dtype) assert is_extension_array_dtype(pd.Series(data)) assert isinstance(data.dtype, ExtensionDtype) - - def test_tolist(self, data): - result = tolist(data) - expected = list(data) - assert result == expected From e902f182521da9bf17d683ed9d07cbdfdaf72359 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 1 Mar 2018 15:52:51 -0600 Subject: [PATCH 10/18] Fixed Categorical unwrapping --- doc/source/whatsnew/v0.23.0.txt | 1 + pandas/core/arrays/categorical.py | 9 ++++----- pandas/tests/categorical/test_dtypes.py | 5 +++++ 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt index fb19fd81fe7c7..39b510954b66b 100644 --- a/doc/source/whatsnew/v0.23.0.txt +++ b/doc/source/whatsnew/v0.23.0.txt @@ -779,6 +779,7 @@ Categorical - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`) - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`) - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`) +- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`) Datetimelike ^^^^^^^^^^^^ diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index abba9f8a90528..a99e3e2900fe4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -13,7 +13,8 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import ( maybe_infer_to_datetimelike, - coerce_indexer_dtype) + coerce_indexer_dtype, + tolist) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.common import ( _ensure_int64, @@ -475,9 +476,7 @@ def tolist(self): (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) """ - if is_datetimelike(self.categories): - return [com._maybe_box_datetimelike(x) for x in self] - return np.array(self).tolist() + return tolist(self) @property def base(self): @@ -1712,7 +1711,7 @@ def __len__(self): def __iter__(self): """Returns an Iterator over the values of this Categorical.""" - return iter(self.get_values()) + return iter(self.get_values().tolist()) def _tidy_repr(self, max_vals=10, footer=True): """ a short repr displaying only max_vals and an optional (but default diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py index 8973d1196f6a9..92e7b03e4ba71 100644 --- a/pandas/tests/categorical/test_dtypes.py +++ b/pandas/tests/categorical/test_dtypes.py @@ -161,3 +161,8 @@ def test_astype_category(self, dtype_ordered, cat_ordered): result = cat.astype('category') expected = cat tm.assert_categorical_equal(result, expected) + + def test_iter_python_types(self): + # GH-19909 + cat = Categorical([1, 2]) + assert isinstance(list(cat)[0], int) From 17126e6000a0941283bfba22703a51d074707640 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 06:49:57 -0600 Subject: [PATCH 11/18] updated --- pandas/core/arrays/categorical.py | 2 +- pandas/tests/categorical/test_dtypes.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a99e3e2900fe4..eeeed71e96077 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1711,7 +1711,7 @@ def __len__(self): def __iter__(self): """Returns an Iterator over the values of this Categorical.""" - return iter(self.get_values().tolist()) + return iter(tolist(self)) def _tidy_repr(self, max_vals=10, footer=True): """ a short repr displaying only max_vals and an optional (but default diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py index 92e7b03e4ba71..82e5a7c4e56f8 100644 --- a/pandas/tests/categorical/test_dtypes.py +++ b/pandas/tests/categorical/test_dtypes.py @@ -1,11 +1,11 @@ # -*- coding: utf-8 -*- - import pytest import numpy as np import pandas.util.testing as tm from pandas.core.dtypes.dtypes import CategoricalDtype +from pandas.compat import long from pandas import Categorical, Index, CategoricalIndex, Series @@ -164,5 +164,6 @@ def test_astype_category(self, dtype_ordered, cat_ordered): def test_iter_python_types(self): # GH-19909 + # TODO(Py2): Remove long cat = Categorical([1, 2]) - assert isinstance(list(cat)[0], int) + assert isinstance(list(cat)[0], (int, long)) From 1106ef258b9905fe225b967fe77206e981a1ac53 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 07:08:00 -0600 Subject: [PATCH 12/18] Back to getvalues list --- pandas/core/arrays/categorical.py | 2 +- pandas/core/dtypes/cast.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index eeeed71e96077..a99e3e2900fe4 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1711,7 +1711,7 @@ def __len__(self): def __iter__(self): """Returns an Iterator over the values of this Categorical.""" - return iter(tolist(self)) + return iter(self.get_values().tolist()) def _tidy_repr(self, max_vals=10, footer=True): """ a short repr displaying only max_vals and an optional (but default diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index f8790c0b25cb0..4b1c44acb4187 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1235,7 +1235,7 @@ def tolist(values): Returns ------- list - Each element of the list is a Python scalar (stor, int float) + Each element of the list is a Python scalar (str, int, float) or a pandas scalar (Timestamp / Timedelta / Interval / Period) or the scalar type for 3rd party Extension Arrays. From 744c381c7f7885bdb619ee5b2774b20ec7303f3e Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 08:20:34 -0600 Subject: [PATCH 13/18] Simplified --- pandas/core/arrays/categorical.py | 5 ++--- pandas/tests/categorical/test_dtypes.py | 9 ++++++++- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index a99e3e2900fe4..f97b288c60e9e 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -13,8 +13,7 @@ from pandas.core.dtypes.missing import isna, notna from pandas.core.dtypes.cast import ( maybe_infer_to_datetimelike, - coerce_indexer_dtype, - tolist) + coerce_indexer_dtype) from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.core.dtypes.common import ( _ensure_int64, @@ -476,7 +475,7 @@ def tolist(self): (for str, int, float) or a pandas scalar (for Timestamp/Timedelta/Interval/Period) """ - return tolist(self) + return list(self) @property def base(self): diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py index 82e5a7c4e56f8..00e99db628c2a 100644 --- a/pandas/tests/categorical/test_dtypes.py +++ b/pandas/tests/categorical/test_dtypes.py @@ -6,7 +6,7 @@ import pandas.util.testing as tm from pandas.core.dtypes.dtypes import CategoricalDtype from pandas.compat import long -from pandas import Categorical, Index, CategoricalIndex, Series +from pandas import Categorical, Index, CategoricalIndex, Series, Timestamp class TestCategoricalDtypes(object): @@ -167,3 +167,10 @@ def test_iter_python_types(self): # TODO(Py2): Remove long cat = Categorical([1, 2]) assert isinstance(list(cat)[0], (int, long)) + assert isinstance(cat.tolist()[0], (int, long)) + + def test_iter_python_types_datetime(self): + cat = Categorical([Timestamp('2017-01-01'), + Timestamp('2017-01-02')]) + assert isinstance(list(cat)[0], Timestamp) + assert isinstance(cat.tolist()[0], Timestamp) From 9a3fa550c9739292cf896f7cd56fb85a3beb23cf Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 15:50:16 -0600 Subject: [PATCH 14/18] As a method --- pandas/core/base.py | 9 +++++++-- pandas/core/dtypes/cast.py | 8 -------- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/pandas/core/base.py b/pandas/core/base.py index fcac9bbf51f81..8c2fd874c5142 100644 --- a/pandas/core/base.py +++ b/pandas/core/base.py @@ -8,8 +8,8 @@ from pandas.core.dtypes.missing import isna from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass -from pandas.core.dtypes.cast import tolist from pandas.core.dtypes.common import ( + is_datetimelike, is_object_dtype, is_list_like, is_scalar, @@ -826,7 +826,12 @@ def tolist(self): -------- numpy.ndarray.tolist """ - return tolist(self._values) + if is_datetimelike(self._values): + return [com._maybe_box_datetimelike(x) for x in self._values] + elif is_extension_array_dtype(self._values): + return list(self._values) + else: + return self._values.tolist() def __iter__(self): """ diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 4b1c44acb4187..d587e1f37392d 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1244,11 +1244,3 @@ def tolist(values): Series.tolist numpy.ndarray.tolist """ - from pandas.core.common import _maybe_box_datetimelike - - if is_datetimelike(values): - return [_maybe_box_datetimelike(x) for x in values] - elif is_extension_array_dtype(values): - return list(values) - else: - return values.tolist() From f22fd7beab950651c65cced5b8ca2b2160a5dac5 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Fri, 2 Mar 2018 21:14:47 -0600 Subject: [PATCH 15/18] Removed tolist fully --- pandas/core/dtypes/cast.py | 21 --------------------- pandas/tests/dtypes/test_cast.py | 15 +-------------- 2 files changed, 1 insertion(+), 35 deletions(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index d587e1f37392d..611146ef9ff7c 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -1223,24 +1223,3 @@ def construct_1d_object_array_from_listlike(values): result = np.empty(len(values), dtype='object') result[:] = values return result - - -def tolist(values): - """Convert an array-like to a list of scalar types. - - Parameters - ---------- - values : ndarray or ExtensionArray - - Returns - ------- - list - Each element of the list is a Python scalar (str, int, float) - or a pandas scalar (Timestamp / Timedelta / Interval / Period) or - the scalar type for 3rd party Extension Arrays. - - See Also - -------- - Series.tolist - numpy.ndarray.tolist - """ diff --git a/pandas/tests/dtypes/test_cast.py b/pandas/tests/dtypes/test_cast.py index acc5657b1d952..96a9e3227b40b 100644 --- a/pandas/tests/dtypes/test_cast.py +++ b/pandas/tests/dtypes/test_cast.py @@ -23,8 +23,7 @@ maybe_convert_scalar, find_common_type, construct_1d_object_array_from_listlike, - construct_1d_arraylike_from_scalar, - tolist) + construct_1d_arraylike_from_scalar) from pandas.core.dtypes.dtypes import ( CategoricalDtype, DatetimeTZDtype, @@ -325,18 +324,6 @@ def test_maybe_convert_objects_copy(self): out = maybe_convert_objects(values, copy=True) assert values is not out - @pytest.mark.parametrize('values, expected', [ - (pd.date_range('2017', periods=1), [pd.Timestamp('2017')]), - (pd.period_range('2017', periods=1, freq='D'), - [pd.Period('2017', freq='D')]), - (pd.timedelta_range(0, periods=1), [pd.Timedelta('0')]), - (pd.interval_range(0, periods=1), [pd.Interval(0, 1)]), - (np.array([0, 1]), [0, 1]), - ]) - def test_tolist(self, values, expected): - result = tolist(values) - assert result == expected - class TestCommonTypes(object): From b342efe1fa6d183c4ad20c1890a78181025fcd4c Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 5 Mar 2018 15:51:11 -0800 Subject: [PATCH 16/18] Linting --- pandas/core/dtypes/cast.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py index 611146ef9ff7c..747dfe62e1fb7 100644 --- a/pandas/core/dtypes/cast.py +++ b/pandas/core/dtypes/cast.py @@ -11,7 +11,7 @@ from .common import (_ensure_object, is_bool, is_integer, is_float, is_complex, is_datetimetz, is_categorical_dtype, is_datetimelike, - is_extension_type, is_extension_array_dtype, + is_extension_type, is_object_dtype, is_datetime64tz_dtype, is_datetime64_dtype, is_datetime64_ns_dtype, From a35f93c6ef65d66b756570d6e15d5433634cf8b7 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 09:10:49 -0500 Subject: [PATCH 17/18] Just apply to objects --- pandas/core/arrays/base.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 6566e9a76841e..91601456e146e 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -279,19 +279,10 @@ def fillna(self, value=None, method=None, limit=None): if mask.any(): if method is not None: - # ffill / bfill - # The basic idea is to create an array of integer positions. - # Internally, we use iNaT and the datetime filling routines - # to avoid floating-point NaN. Once filled, we take on `self` - # to get the actual values. func = pad_1d if method == 'pad' else backfill_1d - idx = np.arange(len(self), dtype='int64') - idx[mask] = iNaT - idx = _ensure_platform_int(func(idx, mask=mask, - limit=limit, - dtype='datetime64[ns]')) - idx[idx == iNaT] = -1 # missing value marker for take. - new_values = self.take(idx) + new_values = func(self.astype(object), limit=limit, + mask=mask) + new_values = self._constructor_from_sequence(new_values) else: # fill with value new_values = self.copy() From 1160e1530bef37fc6ebcda36d9e6db6ebc22b6d3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 12 Mar 2018 20:45:18 -0500 Subject: [PATCH 18/18] Linting --- pandas/core/arrays/base.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 91601456e146e..6ed486cb9998b 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -264,8 +264,6 @@ def fillna(self, value=None, method=None, limit=None): from pandas.api.types import is_scalar from pandas.util._validators import validate_fillna_kwargs from pandas.core.missing import pad_1d, backfill_1d - from pandas.core.dtypes.common import _ensure_platform_int - from pandas._libs.tslib import iNaT value, method = validate_fillna_kwargs(value, method)