REF/TST: collect astype tests (#37282)

pandas-dev · Oct 21, 2020 · 6ac3765 · 6ac3765
1 parent 4a08c02
commit 6ac3765
Show file tree

Hide file tree

Showing 5 changed files with 339 additions and 318 deletions.
diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py
@@ -0,0 +1,28 @@
+import numpy as np
+import pytest
+
+import pandas as pd
+import pandas._testing as tm
+
+
+class TestConvertDtypes:
+    @pytest.mark.parametrize(
+        "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
+    )
+    def test_convert_dtypes(self, convert_integer, expected):
+        # Specific types are tested in tests/series/test_dtypes.py
+        # Just check that it works for DataFrame here
+        df = pd.DataFrame(
+            {
+                "a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
+                "b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
+            }
+        )
+        result = df.convert_dtypes(True, True, convert_integer, False)
+        expected = pd.DataFrame(
+            {
+                "a": pd.Series([1, 2, 3], dtype=expected),
+                "b": pd.Series(["x", "y", "z"], dtype="string"),
+            }
+        )
+        tm.assert_frame_equal(result, expected)
diff --git a/pandas/tests/frame/test_dtypes.py b/pandas/tests/frame/test_dtypes.py
@@ -245,27 +245,6 @@ def test_str_to_small_float_conversion_type(self):
         expected = DataFrame(col_data, columns=["A"], dtype=float)
         tm.assert_frame_equal(result, expected)
 
-    @pytest.mark.parametrize(
-        "convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
-    )
-    def test_convert_dtypes(self, convert_integer, expected):
-        # Specific types are tested in tests/series/test_dtypes.py
-        # Just check that it works for DataFrame here
-        df = DataFrame(
-            {
-                "a": Series([1, 2, 3], dtype=np.dtype("int32")),
-                "b": Series(["x", "y", "z"], dtype=np.dtype("O")),
-            }
-        )
-        result = df.convert_dtypes(True, True, convert_integer, False)
-        expected = DataFrame(
-            {
-                "a": Series([1, 2, 3], dtype=expected),
-                "b": Series(["x", "y", "z"], dtype="string"),
-            }
-        )
-        tm.assert_frame_equal(result, expected)
-
 
 class TestDataFrameDatetimeWithTZ:
     def test_interleave(self, timezone_frame):

diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py
@@ -1,11 +1,97 @@
+from datetime import datetime, timedelta
+from importlib import reload
+import string
+import sys
+
 import numpy as np
 import pytest
 
-from pandas import NA, Interval, Series, Timestamp, date_range
+from pandas._libs.tslibs import iNaT
+
+from pandas import (
+    NA,
+    Categorical,
+    CategoricalDtype,
+    Index,
+    Interval,
+    Series,
+    Timedelta,
+    Timestamp,
+    date_range,
+)
 import pandas._testing as tm
 
 
+class TestAstypeAPI:
+    def test_arg_for_errors_in_astype(self):
+        # see GH#14878
+        ser = Series([1, 2, 3])
+
+        msg = (
+            r"Expected value of kwarg 'errors' to be one of \['raise', "
+            r"'ignore'\]\. Supplied value is 'False'"
+        )
+        with pytest.raises(ValueError, match=msg):
+            ser.astype(np.float64, errors=False)
+
+        ser.astype(np.int8, errors="raise")
+
+    @pytest.mark.parametrize("dtype_class", [dict, Series])
+    def test_astype_dict_like(self, dtype_class):
+        # see GH#7271
+        ser = Series(range(0, 10, 2), name="abc")
+
+        dt1 = dtype_class({"abc": str})
+        result = ser.astype(dt1)
+        expected = Series(["0", "2", "4", "6", "8"], name="abc")
+        tm.assert_series_equal(result, expected)
+
+        dt2 = dtype_class({"abc": "float64"})
+        result = ser.astype(dt2)
+        expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
+        tm.assert_series_equal(result, expected)
+
+        dt3 = dtype_class({"abc": str, "def": str})
+        msg = (
+            "Only the Series name can be used for the key in Series dtype "
+            r"mappings\."
+        )
+        with pytest.raises(KeyError, match=msg):
+            ser.astype(dt3)
+
+        dt4 = dtype_class({0: str})
+        with pytest.raises(KeyError, match=msg):
+            ser.astype(dt4)
+
+        # GH#16717
+        # if dtypes provided is empty, it should error
+        if dtype_class is Series:
+            dt5 = dtype_class({}, dtype=object)
+        else:
+            dt5 = dtype_class({})
+
+        with pytest.raises(KeyError, match=msg):
+            ser.astype(dt5)
+
+
 class TestAstype:
+    @pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
+    def test_astype_generic_timestamp_no_frequency(self, dtype, request):
+        # see GH#15524, GH#15987
+        data = [1]
+        s = Series(data)
+
+        if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
+            mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
+            request.node.add_marker(mark)
+
+        msg = (
+            fr"The '{dtype.__name__}' dtype has no unit\. "
+            fr"Please pass in '{dtype.__name__}\[ns\]' instead."
+        )
+        with pytest.raises(ValueError, match=msg):
+            s.astype(dtype)
+
     def test_astype_dt64_to_str(self):
         # GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
         dti = date_range("2012-01-01", periods=3)
@@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
         )
         tm.assert_series_equal(result, expected)
 
+    def test_astype_datetime(self):
+        s = Series(iNaT, dtype="M8[ns]", index=range(5))
+
+        s = s.astype("O")
+        assert s.dtype == np.object_
+
+        s = Series([datetime(2001, 1, 2, 0, 0)])
+
+        s = s.astype("O")
+        assert s.dtype == np.object_
+
+        s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])
+
+        s[1] = np.nan
+        assert s.dtype == "M8[ns]"
+
+        s = s.astype("O")
+        assert s.dtype == np.object_
+
+    def test_astype_datetime64tz(self):
+        s = Series(date_range("20130101", periods=3, tz="US/Eastern"))
+
+        # astype
+        result = s.astype(object)
+        expected = Series(s.astype(object), dtype=object)
+        tm.assert_series_equal(result, expected)
+
+        result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
+        tm.assert_series_equal(result, s)
+
+        # astype - object, preserves on construction
+        result = Series(s.astype(object))
+        expected = s.astype(object)
+        tm.assert_series_equal(result, expected)
+
+        # astype - datetime64[ns, tz]
+        result = Series(s.values).astype("datetime64[ns, US/Eastern]")
+        tm.assert_series_equal(result, s)
+
+        result = Series(s.values).astype(s.dtype)
+        tm.assert_series_equal(result, s)
+
+        result = s.astype("datetime64[ns, CET]")
+        expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
+        tm.assert_series_equal(result, expected)
+
+    def test_astype_str_cast_dt64(self):
+        # see GH#9757
+        ts = Series([Timestamp("2010-01-04 00:00:00")])
+        s = ts.astype(str)
+
+        expected = Series(["2010-01-04"])
+        tm.assert_series_equal(s, expected)
+
+        ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
+        s = ts.astype(str)
+
+        expected = Series(["2010-01-04 00:00:00-05:00"])
+        tm.assert_series_equal(s, expected)
+
+    def test_astype_str_cast_td64(self):
+        # see GH#9757
+
+        td = Series([Timedelta(1, unit="d")])
+        ser = td.astype(str)
+
+        expected = Series(["1 days"])
+        tm.assert_series_equal(ser, expected)
+
+    def test_dt64_series_astype_object(self):
+        dt64ser = Series(date_range("20130101", periods=3))
+        result = dt64ser.astype(object)
+        assert isinstance(result.iloc[0], datetime)
+        assert result.dtype == np.object_
+
+    def test_td64_series_astype_object(self):
+        tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
+        result = tdser.astype(object)
+        assert isinstance(result.iloc[0], timedelta)
+        assert result.dtype == np.object_
+
     @pytest.mark.parametrize(
         "values",
         [
@@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
         result = s.astype(str)
         expected = Series(["a", "b", string_value], dtype=object)
         tm.assert_series_equal(result, expected)
+
+    @pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
+    def test_astype(self, dtype):
+        s = Series(np.random.randn(5), name="foo")
+        as_typed = s.astype(dtype)
+
+        assert as_typed.dtype == dtype
+        assert as_typed.name == s.name
+
+    @pytest.mark.parametrize("value", [np.nan, np.inf])
+    @pytest.mark.parametrize("dtype", [np.int32, np.int64])
+    def test_astype_cast_nan_inf_int(self, dtype, value):
+        # gh-14265: check NaN and inf raise error when converting to int
+        msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
+        s = Series([value])
+
+        with pytest.raises(ValueError, match=msg):
+            s.astype(dtype)
+
+    @pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
+    def test_astype_cast_object_int_fail(self, dtype):
+        arr = Series(["car", "house", "tree", "1"])
+        msg = r"invalid literal for int\(\) with base 10: 'car'"
+        with pytest.raises(ValueError, match=msg):
+            arr.astype(dtype)
+
+    def test_astype_cast_object_int(self):
+        arr = Series(["1", "2", "3", "4"], dtype=object)
+        result = arr.astype(int)
+
+        tm.assert_series_equal(result, Series(np.arange(1, 5)))
+
+    def test_astype_unicode(self):
+        # see GH#7758: A bit of magic is required to set
+        # default encoding to utf-8
+        digits = string.digits
+        test_series = [
+            Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
+            Series(["データーサイエンス、お前はもう死んでいる"]),
+        ]
+
+        former_encoding = None
+
+        if sys.getdefaultencoding() == "utf-8":
+            test_series.append(Series(["野菜食べないとやばい".encode()]))
+
+        for s in test_series:
+            res = s.astype("unicode")
+            expec = s.map(str)
+            tm.assert_series_equal(res, expec)
+
+        # Restore the former encoding
+        if former_encoding is not None and former_encoding != "utf-8":
+            reload(sys)
+            sys.setdefaultencoding(former_encoding)
+
+
+class TestAstypeCategorical:
+    def test_astype_categoricaldtype(self):
+        s = Series(["a", "b", "a"])
+        result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
+        expected = Series(Categorical(["a", "b", "a"], ordered=True))
+        tm.assert_series_equal(result, expected)
+
+        result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
+        expected = Series(Categorical(["a", "b", "a"], ordered=False))
+        tm.assert_series_equal(result, expected)
+
+        result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
+        expected = Series(
+            Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
+        )
+        tm.assert_series_equal(result, expected)
+        tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))
+
+    @pytest.mark.parametrize("name", [None, "foo"])
+    @pytest.mark.parametrize("dtype_ordered", [True, False])
+    @pytest.mark.parametrize("series_ordered", [True, False])
+    def test_astype_categorical_to_categorical(
+        self, name, dtype_ordered, series_ordered
+    ):
+        # GH#10696, GH#18593
+        s_data = list("abcaacbab")
+        s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
+        s = Series(s_data, dtype=s_dtype, name=name)
+
+        # unspecified categories
+        dtype = CategoricalDtype(ordered=dtype_ordered)
+        result = s.astype(dtype)
+        exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
+        expected = Series(s_data, name=name, dtype=exp_dtype)
+        tm.assert_series_equal(result, expected)
+
+        # different categories
+        dtype = CategoricalDtype(list("adc"), dtype_ordered)
+        result = s.astype(dtype)
+        expected = Series(s_data, name=name, dtype=dtype)
+        tm.assert_series_equal(result, expected)
+
+        if dtype_ordered is False:
+            # not specifying ordered, so only test once
+            expected = s
+            result = s.astype("category")
+            tm.assert_series_equal(result, expected)
+
+    def test_astype_bool_missing_to_categorical(self):
+        # GH-19182
+        s = Series([True, False, np.nan])
+        assert s.dtypes == np.object_
+
+        result = s.astype(CategoricalDtype(categories=[True, False]))
+        expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
+        tm.assert_series_equal(result, expected)
+
+    def test_astype_categories_raises(self):
+        # deprecated GH#17636, removed in GH#27141
+        s = Series(["a", "b", "a"])
+        with pytest.raises(TypeError, match="got an unexpected"):
+            s.astype("category", categories=["a", "b"], ordered=True)
diff --git a/pandas/tests/series/methods/test_infer_objects.py b/pandas/tests/series/methods/test_infer_objects.py
@@ -0,0 +1,23 @@
+import numpy as np
+
+from pandas import Series
+import pandas._testing as tm
+
+
+class TestInferObjects:
+    def test_infer_objects_series(self):
+        # GH#11221
+        actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
+        expected = Series([1, 2, 3])
+        tm.assert_series_equal(actual, expected)
+
+        actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
+        expected = Series([1.0, 2.0, 3.0, np.nan])
+        tm.assert_series_equal(actual, expected)
+
+        # only soft conversions, unconvertable pass thru unchanged
+        actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
+        expected = Series([1, 2, 3, None, "a"])
+
+        assert actual.dtype == "object"
+        tm.assert_series_equal(actual, expected)