pandas-dev · jreback · Mar 16, 2018 · Feb 22, 2018 · Feb 27, 2018 · Feb 27, 2018
diff --git a/doc/source/whatsnew/v0.23.0.txt b/doc/source/whatsnew/v0.23.0.txt
@@ -843,6 +843,7 @@ Categorical
 - Bug in :meth:`Series.astype` and ``Categorical.astype()`` where an existing categorical data does not get updated (:issue:`10696`, :issue:`18593`)
 - Bug in :class:`Index` constructor with ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19032`)
 - Bug in :class:`Series` constructor with scalar and ``dtype=CategoricalDtype(...)`` where ``categories`` and ``ordered`` are not maintained (issue:`19565`)
+- Bug in ``Categorical.__iter__`` not converting to Python types (:issue:`19909`)
 - Bug in :func:`pandas.factorize` returning the unique codes for the ``uniques``. This now returns a ``Categorical`` with the same dtype as the input (:issue:`19721`)
 - Bug in :func:`pandas.factorize` including an item for missing values in the ``uniques`` return value (:issue:`19721`)
 

diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py
@@ -236,6 +236,59 @@ def isna(self):
         """
         raise AbstractMethodError(self)
 
+    def fillna(self, value=None, method=None, limit=None):
+        """ Fill NA/NaN values using the specified method.
+
+        Parameters
+        ----------
+        value : scalar, array-like
+            If a scalar value is passed it is used to fill all missing values.
+            Alternatively, an array-like 'value' can be given. It's expected
+            that the array-like have the same length as 'self'.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
+        limit : int, default None
+            If method is specified, this is the maximum number of consecutive
+            NaN values to forward/backward fill. In other words, if there is
+            a gap with more than this number of consecutive NaNs, it will only
+            be partially filled. If method is not specified, this is the
+            maximum number of entries along the entire axis where NaNs will be
+            filled.
+
+        Returns
+        -------
+        filled : ExtensionArray with NA/NaN filled
+        """
+        from pandas.api.types import is_scalar
+        from pandas.util._validators import validate_fillna_kwargs
+        from pandas.core.missing import pad_1d, backfill_1d
+
+        value, method = validate_fillna_kwargs(value, method)
+
+        mask = self.isna()
+
+        if not is_scalar(value):
+            if len(value) != len(self):
+                raise ValueError("Length of 'value' does not match. Got ({}) "
+                                 " expected {}".format(len(value), len(self)))
+            value = value[mask]
+
+        if mask.any():
+            if method is not None:
+                func = pad_1d if method == 'pad' else backfill_1d
+                new_values = func(self.astype(object), limit=limit,
+                                  mask=mask)
+                new_values = self._constructor_from_sequence(new_values)
+            else:
+                # fill with value
+                new_values = self.copy()
+                new_values[mask] = value
+        else:
+            new_values = self.copy()
+        return new_values
+
     def unique(self):
         """Compute the ExtensionArray of unique values.
 
@@ -285,6 +338,7 @@ def take(self, indexer, allow_fill=True, fill_value=None):
         .. code-block:: python
 
            def take(self, indexer, allow_fill=True, fill_value=None):
+               indexer = np.asarray(indexer)
                mask = indexer == -1
                result = self.data.take(indexer)
                result[mask] = np.nan  # NA for this type

diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py
@@ -480,9 +480,7 @@ def tolist(self):
         (for str, int, float) or a pandas scalar
         (for Timestamp/Timedelta/Interval/Period)
         """
-        if is_datetimelike(self.categories):
-            return [com._maybe_box_datetimelike(x) for x in self]
-        return np.array(self).tolist()
+        return list(self)
 
     @property
     def base(self):
@@ -1592,16 +1590,16 @@ def fillna(self, value=None, method=None, limit=None):
 
         Parameters
         ----------
-        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
-            Method to use for filling holes in reindexed Series
-            pad / ffill: propagate last valid observation forward to next valid
-            backfill / bfill: use NEXT valid observation to fill gap
         value : scalar, dict, Series
             If a scalar value is passed it is used to fill all missing values.
             Alternatively, a Series or dict can be used to fill in different
             values for each index. The value should not be a list. The
             value(s) passed should either be in the categories or should be
             NaN.
+        method : {'backfill', 'bfill', 'pad', 'ffill', None}, default None
+            Method to use for filling holes in reindexed Series
+            pad / ffill: propagate last valid observation forward to next valid
+            backfill / bfill: use NEXT valid observation to fill gap
         limit : int, default None
             (Not implemented yet for Categorical!)
             If method is specified, this is the maximum number of consecutive
@@ -1717,7 +1715,7 @@ def __len__(self):
 
     def __iter__(self):
         """Returns an Iterator over the values of this Categorical."""
-        return iter(self.get_values())
+        return iter(self.get_values().tolist())
 
     def _tidy_repr(self, max_vals=10, footer=True):
         """ a short repr displaying only max_vals and an optional (but default

diff --git a/pandas/core/base.py b/pandas/core/base.py
@@ -9,10 +9,10 @@
 from pandas.core.dtypes.missing import isna
 from pandas.core.dtypes.generic import ABCDataFrame, ABCSeries, ABCIndexClass
 from pandas.core.dtypes.common import (
+    is_datetimelike,
     is_object_dtype,
     is_list_like,
     is_scalar,
-    is_datetimelike,
     is_extension_type,
     is_extension_array_dtype)
 
@@ -826,9 +826,10 @@ def tolist(self):
         --------
         numpy.ndarray.tolist
         """
-
-        if is_datetimelike(self):
+        if is_datetimelike(self._values):
             return [com._maybe_box_datetimelike(x) for x in self._values]
+        elif is_extension_array_dtype(self._values):
+            return list(self._values)
         else:
             return self._values.tolist()
 

diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -11,7 +11,8 @@
 from .common import (_ensure_object, is_bool, is_integer, is_float,
                      is_complex, is_datetimetz, is_categorical_dtype,
                      is_datetimelike,
-                     is_extension_type, is_object_dtype,
+                     is_extension_type,
+                     is_object_dtype,
                      is_datetime64tz_dtype, is_datetime64_dtype,
                      is_datetime64_ns_dtype,
                      is_timedelta64_dtype, is_timedelta64_ns_dtype,

diff --git a/pandas/core/internals.py b/pandas/core/internals.py
@@ -1963,6 +1963,23 @@ def concat_same_type(self, to_concat, placement=None):
         return self.make_block_same_class(values, ndim=self.ndim,
                                           placement=placement)
 
+    def fillna(self, value, limit=None, inplace=False, downcast=None,
+               mgr=None):
+        values = self.values if inplace else self.values.copy()
+        values = values.fillna(value=value, limit=limit)
+        return [self.make_block_same_class(values=values,
+                                           placement=self.mgr_locs,
+                                           ndim=self.ndim)]
+
+    def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
+                    fill_value=None, **kwargs):
+
+        values = self.values if inplace else self.values.copy()
+        return self.make_block_same_class(
+            values=values.fillna(value=fill_value, method=method,
+                                 limit=limit),
+            placement=self.mgr_locs)
+
 
 class NumericBlock(Block):
     __slots__ = ()
@@ -2522,27 +2539,6 @@ def _try_coerce_result(self, result):
 
         return result
 
-    def fillna(self, value, limit=None, inplace=False, downcast=None,
-               mgr=None):
-        # we may need to upcast our fill to match our dtype
-        if limit is not None:
-            raise NotImplementedError("specifying a limit for 'fillna' has "
-                                      "not been implemented yet")
-
-        values = self.values if inplace else self.values.copy()
-        values = self._try_coerce_result(values.fillna(value=value,
-                                                       limit=limit))
-        return [self.make_block(values=values)]
-
-    def interpolate(self, method='pad', axis=0, inplace=False, limit=None,
-                    fill_value=None, **kwargs):
-
-        values = self.values if inplace else self.values.copy()
-        return self.make_block_same_class(
-            values=values.fillna(fill_value=fill_value, method=method,
-                                 limit=limit),
-            placement=self.mgr_locs)
-
     def shift(self, periods, axis=0, mgr=None):
         return self.make_block_same_class(values=self.values.shift(periods),
                                           placement=self.mgr_locs)

diff --git a/pandas/tests/categorical/test_dtypes.py b/pandas/tests/categorical/test_dtypes.py
@@ -1,12 +1,12 @@
 # -*- coding: utf-8 -*-
-
 import pytest
 
 import numpy as np
 
 import pandas.util.testing as tm
 from pandas.core.dtypes.dtypes import CategoricalDtype
-from pandas import Categorical, Index, CategoricalIndex, Series
+from pandas.compat import long
+from pandas import Categorical, Index, CategoricalIndex, Series, Timestamp
 
 
 class TestCategoricalDtypes(object):
@@ -161,3 +161,16 @@ def test_astype_category(self, dtype_ordered, cat_ordered):
             result = cat.astype('category')
             expected = cat
             tm.assert_categorical_equal(result, expected)
+
+    def test_iter_python_types(self):
+        # GH-19909
+        # TODO(Py2): Remove long
+        cat = Categorical([1, 2])
+        assert isinstance(list(cat)[0], (int, long))
+        assert isinstance(cat.tolist()[0], (int, long))
+
+    def test_iter_python_types_datetime(self):
+        cat = Categorical([Timestamp('2017-01-01'),
+                           Timestamp('2017-01-02')])
+        assert isinstance(list(cat)[0], Timestamp)
+        assert isinstance(cat.tolist()[0], Timestamp)
diff --git a/pandas/tests/extension/base/casting.py b/pandas/tests/extension/base/casting.py
@@ -11,3 +11,8 @@ def test_astype_object_series(self, all_data):
         ser = pd.Series({"A": all_data})
         result = ser.astype(object)
         assert isinstance(result._data.blocks[0], ObjectBlock)
+
+    def test_tolist(self, data):
+        result = pd.Series(data).tolist()
+        expected = list(data)
+        assert result == expected
diff --git a/pandas/tests/extension/base/missing.py b/pandas/tests/extension/base/missing.py
@@ -1,4 +1,5 @@
 import numpy as np
+import pytest
 
 import pandas as pd
 import pandas.util.testing as tm
@@ -45,3 +46,71 @@ def test_dropna_frame(self, data_missing):
         result = df.dropna()
         expected = df.iloc[:0]
         self.assert_frame_equal(result, expected)
+
+    def test_fillna_limit_pad(self, data_missing):
+        arr = data_missing.take([1, 0, 0, 0, 1])
+        result = pd.Series(arr).fillna(method='ffill', limit=2)
+        expected = pd.Series(data_missing.take([1, 1, 1, 0, 1]))
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_limit_backfill(self, data_missing):
+        arr = data_missing.take([1, 0, 0, 0, 1])
+        result = pd.Series(arr).fillna(method='backfill', limit=2)
+        expected = pd.Series(data_missing.take([1, 0, 1, 1, 1]))
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_series(self, data_missing):
+        fill_value = data_missing[1]
+        ser = pd.Series(data_missing)
+
+        result = ser.fillna(fill_value)
+        expected = pd.Series(type(data_missing)([fill_value, fill_value]))
+        self.assert_series_equal(result, expected)
+
+        # Fill with a series
+        result = ser.fillna(expected)
+        self.assert_series_equal(result, expected)
+
+        # Fill with a series not affecting the missing values
+        result = ser.fillna(ser)
+        self.assert_series_equal(result, ser)
+
+    @pytest.mark.parametrize('method', ['ffill', 'bfill'])
+    def test_fillna_series_method(self, data_missing, method):
+        fill_value = data_missing[1]
+
+        if method == 'ffill':
+            data_missing = type(data_missing)(data_missing[::-1])
+
+        result = pd.Series(data_missing).fillna(method=method)
+        expected = pd.Series(type(data_missing)([fill_value, fill_value]))
+
+        self.assert_series_equal(result, expected)
+
+    def test_fillna_frame(self, data_missing):
+        fill_value = data_missing[1]
+
+        result = pd.DataFrame({
+            "A": data_missing,
+            "B": [1, 2]
+        }).fillna(fill_value)
+
+        expected = pd.DataFrame({
+            "A": type(data_missing)([fill_value, fill_value]),
+            "B": [1, 2],
+        })
+
+        self.assert_frame_equal(result, expected)
+
+    def test_fillna_fill_other(self, data):
+        result = pd.DataFrame({
+            "A": data,
+            "B": [np.nan] * len(data)
+        }).fillna({"B": 0.0})
+
+        expected = pd.DataFrame({
+            "A": data,
+            "B": [0.0] * len(result),
+        })
+
+        self.assert_frame_equal(result, expected)
diff --git a/pandas/tests/extension/category/test_categorical.py b/pandas/tests/extension/category/test_categorical.py
@@ -69,7 +69,14 @@ def test_getitem_scalar(self):
 
 
 class TestMissing(base.BaseMissingTests):
-    pass
+
+    @pytest.mark.skip(reason="Not implemented")
+    def test_fillna_limit_pad(self):
+        pass
+
+    @pytest.mark.skip(reason="Not implemented")
+    def test_fillna_limit_backfill(self):
+        pass
 
 
 class TestMethods(base.BaseMethodsTests):

diff --git a/pandas/tests/extension/decimal/array.py b/pandas/tests/extension/decimal/array.py
@@ -71,6 +71,7 @@ def isna(self):
         return np.array([x.is_nan() for x in self.values])
 
     def take(self, indexer, allow_fill=True, fill_value=None):
+        indexer = np.asarray(indexer)
         mask = indexer == -1
 
         indexer = _ensure_platform_int(indexer)