ERR/TST: Add pytest idiom to dtypes/test_cast.py (#24847)

pandas-dev · Jan 20, 2019 · af82b2a · af82b2a
1 parent 518b237
commit af82b2a
Show file tree

Hide file tree

Showing 11 changed files with 452 additions and 396 deletions.
diff --git a/pandas/core/dtypes/cast.py b/pandas/core/dtypes/cast.py
@@ -354,7 +354,7 @@ def infer_dtype_from_scalar(val, pandas_dtype=False):
 
     # a 1-element ndarray
     if isinstance(val, np.ndarray):
-        msg = "invalid ndarray passed to _infer_dtype_from_scalar"
+        msg = "invalid ndarray passed to infer_dtype_from_scalar"
         if val.ndim != 0:
             raise ValueError(msg)
 

diff --git a/pandas/tests/dtypes/cast/__init__.py b/pandas/tests/dtypes/cast/__init__.py
diff --git a/pandas/tests/dtypes/cast/test_construct_from_scalar.py b/pandas/tests/dtypes/cast/test_construct_from_scalar.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+from pandas.core.dtypes.cast import construct_1d_arraylike_from_scalar
+from pandas.core.dtypes.dtypes import CategoricalDtype
+
+from pandas import Categorical
+from pandas.util import testing as tm
+
+
+def test_cast_1d_array_like_from_scalar_categorical():
+    # see gh-19565
+    #
+    # Categorical result from scalar did not maintain
+    # categories and ordering of the passed dtype.
+    cats = ["a", "b", "c"]
+    cat_type = CategoricalDtype(categories=cats, ordered=False)
+    expected = Categorical(["a", "a"], categories=cats)
+
+    result = construct_1d_arraylike_from_scalar("a", len(expected), cat_type)
+    tm.assert_categorical_equal(result, expected,
+                                check_category_order=True,
+                                check_dtype=True)
diff --git a/pandas/tests/dtypes/cast/test_construct_ndarray.py b/pandas/tests/dtypes/cast/test_construct_ndarray.py
@@ -0,0 +1,20 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.cast import construct_1d_ndarray_preserving_na
+
+from pandas.util import testing as tm
+
+
+@pytest.mark.parametrize('values, dtype, expected', [
+    ([1, 2, 3], None, np.array([1, 2, 3])),
+    (np.array([1, 2, 3]), None, np.array([1, 2, 3])),
+    (['1', '2', None], None, np.array(['1', '2', None])),
+    (['1', '2', None], np.dtype('str'), np.array(['1', '2', None])),
+    ([1, 2, None], np.dtype('str'), np.array(['1', '2', None])),
+])
+def test_construct_1d_ndarray_preserving_na(values, dtype, expected):
+    result = construct_1d_ndarray_preserving_na(values, dtype=dtype)
+    tm.assert_numpy_array_equal(result, expected)
diff --git a/pandas/tests/dtypes/cast/test_construct_object_arr.py b/pandas/tests/dtypes/cast/test_construct_object_arr.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+import pytest
+
+from pandas.core.dtypes.cast import construct_1d_object_array_from_listlike
+
+
+@pytest.mark.parametrize("datum1", [1, 2., "3", (4, 5), [6, 7], None])
+@pytest.mark.parametrize("datum2", [8, 9., "10", (11, 12), [13, 14], None])
+def test_cast_1d_array(datum1, datum2):
+    data = [datum1, datum2]
+    result = construct_1d_object_array_from_listlike(data)
+
+    # Direct comparison fails: https://github.com/numpy/numpy/issues/10218
+    assert result.dtype == "object"
+    assert list(result) == data
+
+
+@pytest.mark.parametrize("val", [1, 2., None])
+def test_cast_1d_array_invalid_scalar(val):
+    with pytest.raises(TypeError, match="has no len()"):
+        construct_1d_object_array_from_listlike(val)
diff --git a/pandas/tests/dtypes/cast/test_convert_objects.py b/pandas/tests/dtypes/cast/test_convert_objects.py
@@ -0,0 +1,15 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.cast import maybe_convert_objects
+
+
+@pytest.mark.parametrize("data", [[1, 2], ["apply", "banana"]])
+@pytest.mark.parametrize("copy", [True, False])
+def test_maybe_convert_objects_copy(data, copy):
+    arr = np.array(data)
+    out = maybe_convert_objects(arr, copy=copy)
+
+    assert (arr is out) is (not copy)
diff --git a/pandas/tests/dtypes/cast/test_downcast.py b/pandas/tests/dtypes/cast/test_downcast.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.cast import maybe_downcast_to_dtype
+
+from pandas import DatetimeIndex, Series, Timestamp
+from pandas.util import testing as tm
+
+
+@pytest.mark.parametrize("arr,dtype,expected", [
+    (np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995]), "infer",
+     np.array([8.5, 8.6, 8.7, 8.8, 8.9999999999995])),
+
+    (np.array([8., 8., 8., 8., 8.9999999999995]), "infer",
+     np.array([8, 8, 8, 8, 9], dtype=np.int64)),
+
+    (np.array([8., 8., 8., 8., 9.0000000000005]), "infer",
+     np.array([8, 8, 8, 8, 9], dtype=np.int64)),
+])
+def test_downcast(arr, expected, dtype):
+    result = maybe_downcast_to_dtype(arr, dtype)
+    tm.assert_numpy_array_equal(result, expected)
+
+
+def test_downcast_booleans():
+    # see gh-16875: coercing of booleans.
+    ser = Series([True, True, False])
+    result = maybe_downcast_to_dtype(ser, np.dtype(np.float64))
+
+    expected = ser
+    tm.assert_series_equal(result, expected)
+
+
+def test_downcast_conversion_no_nan(any_real_dtype):
+    dtype = any_real_dtype
+    expected = np.array([1, 2])
+    arr = np.array([1.0, 2.0], dtype=dtype)
+
+    result = maybe_downcast_to_dtype(arr, "infer")
+    tm.assert_almost_equal(result, expected, check_dtype=False)
+
+
+def test_downcast_conversion_nan(float_dtype):
+    dtype = float_dtype
+    data = [1.0, 2.0, np.nan]
+
+    expected = np.array(data, dtype=dtype)
+    arr = np.array(data, dtype=dtype)
+
+    result = maybe_downcast_to_dtype(arr, "infer")
+    tm.assert_almost_equal(result, expected)
+
+
+def test_downcast_conversion_empty(any_real_dtype):
+    dtype = any_real_dtype
+    arr = np.array([], dtype=dtype)
+    result = maybe_downcast_to_dtype(arr, "int64")
+    tm.assert_numpy_array_equal(result, np.array([], dtype=np.int64))
+
+
+@pytest.mark.parametrize("klass", [np.datetime64, np.timedelta64])
+def test_datetime_likes_nan(klass):
+    dtype = klass.__name__ + "[ns]"
+    arr = np.array([1, 2, np.nan])
+
+    exp = np.array([1, 2, klass("NaT")], dtype)
+    res = maybe_downcast_to_dtype(arr, dtype)
+    tm.assert_numpy_array_equal(res, exp)
+
+
+@pytest.mark.parametrize("as_asi", [True, False])
+def test_datetime_with_timezone(as_asi):
+    # see gh-15426
+    ts = Timestamp("2016-01-01 12:00:00", tz="US/Pacific")
+    exp = DatetimeIndex([ts, ts])
+
+    obj = exp.asi8 if as_asi else exp
+    res = maybe_downcast_to_dtype(obj, exp.dtype)
+
+    tm.assert_index_equal(res, exp)
diff --git a/pandas/tests/dtypes/cast/test_find_common_type.py b/pandas/tests/dtypes/cast/test_find_common_type.py
@@ -0,0 +1,108 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pandas.core.dtypes.cast import find_common_type
+from pandas.core.dtypes.dtypes import (
+    CategoricalDtype, DatetimeTZDtype, PeriodDtype)
+
+
+@pytest.mark.parametrize("source_dtypes,expected_common_dtype", [
+    ((np.int64,), np.int64),
+    ((np.uint64,), np.uint64),
+    ((np.float32,), np.float32),
+    ((np.object,), np.object),
+
+    # Into ints.
+    ((np.int16, np.int64), np.int64),
+    ((np.int32, np.uint32), np.int64),
+    ((np.uint16, np.uint64), np.uint64),
+
+    # Into floats.
+    ((np.float16, np.float32), np.float32),
+    ((np.float16, np.int16), np.float32),
+    ((np.float32, np.int16), np.float32),
+    ((np.uint64, np.int64), np.float64),
+    ((np.int16, np.float64), np.float64),
+    ((np.float16, np.int64), np.float64),
+
+    # Into others.
+    ((np.complex128, np.int32), np.complex128),
+    ((np.object, np.float32), np.object),
+    ((np.object, np.int16), np.object),
+
+    # Bool with int.
+    ((np.dtype("bool"), np.int64), np.object),
+    ((np.dtype("bool"), np.int32), np.object),
+    ((np.dtype("bool"), np.int16), np.object),
+    ((np.dtype("bool"), np.int8), np.object),
+    ((np.dtype("bool"), np.uint64), np.object),
+    ((np.dtype("bool"), np.uint32), np.object),
+    ((np.dtype("bool"), np.uint16), np.object),
+    ((np.dtype("bool"), np.uint8), np.object),
+
+    # Bool with float.
+    ((np.dtype("bool"), np.float64), np.object),
+    ((np.dtype("bool"), np.float32), np.object),
+
+    ((np.dtype("datetime64[ns]"), np.dtype("datetime64[ns]")),
+     np.dtype("datetime64[ns]")),
+    ((np.dtype("timedelta64[ns]"), np.dtype("timedelta64[ns]")),
+     np.dtype("timedelta64[ns]")),
+
+    ((np.dtype("datetime64[ns]"), np.dtype("datetime64[ms]")),
+     np.dtype("datetime64[ns]")),
+    ((np.dtype("timedelta64[ms]"), np.dtype("timedelta64[ns]")),
+     np.dtype("timedelta64[ns]")),
+
+    ((np.dtype("datetime64[ns]"), np.dtype("timedelta64[ns]")), np.object),
+    ((np.dtype("datetime64[ns]"), np.int64), np.object)
+])
+def test_numpy_dtypes(source_dtypes, expected_common_dtype):
+    assert find_common_type(source_dtypes) == expected_common_dtype
+
+
+def test_raises_empty_input():
+    with pytest.raises(ValueError, match="no types given"):
+        find_common_type([])
+
+
+@pytest.mark.parametrize("dtypes,exp_type", [
+    ([CategoricalDtype()], "category"),
+    ([np.object, CategoricalDtype()], np.object),
+    ([CategoricalDtype(), CategoricalDtype()], "category"),
+])
+def test_categorical_dtype(dtypes, exp_type):
+    assert find_common_type(dtypes) == exp_type
+
+
+def test_datetimetz_dtype_match():
+    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
+    assert find_common_type([dtype, dtype]) == "datetime64[ns, US/Eastern]"
+
+
+@pytest.mark.parametrize("dtype2", [
+    DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
+    np.dtype("datetime64[ns]"), np.object, np.int64
+])
+def test_datetimetz_dtype_mismatch(dtype2):
+    dtype = DatetimeTZDtype(unit="ns", tz="US/Eastern")
+    assert find_common_type([dtype, dtype2]) == np.object
+    assert find_common_type([dtype2, dtype]) == np.object
+
+
+def test_period_dtype_match():
+    dtype = PeriodDtype(freq="D")
+    assert find_common_type([dtype, dtype]) == "period[D]"
+
+
+@pytest.mark.parametrize("dtype2", [
+    DatetimeTZDtype(unit="ns", tz="Asia/Tokyo"),
+    PeriodDtype(freq="2D"), PeriodDtype(freq="H"),
+    np.dtype("datetime64[ns]"), np.object, np.int64
+])
+def test_period_dtype_mismatch(dtype2):
+    dtype = PeriodDtype(freq="D")
+    assert find_common_type([dtype, dtype2]) == np.object
+    assert find_common_type([dtype2, dtype]) == np.object
diff --git a/pandas/tests/dtypes/cast/test_infer_datetimelike.py b/pandas/tests/dtypes/cast/test_infer_datetimelike.py
@@ -0,0 +1,22 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import pytest
+
+from pandas import DataFrame, NaT, Series, Timestamp
+
+
+@pytest.mark.parametrize("data,exp_size", [
+    # see gh-16362.
+    ([[NaT, "a", "b", 0], [NaT, "b", "c", 1]], 8),
+    ([[NaT, "a", 0], [NaT, "b", 1]], 6)
+])
+def test_maybe_infer_to_datetimelike_df_construct(data, exp_size):
+    result = DataFrame(np.array(data))
+    assert result.size == exp_size
+
+
+def test_maybe_infer_to_datetimelike_ser_construct():
+    # see gh-19671.
+    result = Series(["M1701", Timestamp("20130101")])
+    assert result.dtype.kind == "O"