Skip to content

Commit

Permalink
REF/TST: collect astype tests (#37282)
Browse files Browse the repository at this point in the history
  • Loading branch information
jbrockmendel committed Oct 21, 2020
1 parent 4a08c02 commit 6ac3765
Show file tree
Hide file tree
Showing 5 changed files with 339 additions and 318 deletions.
28 changes: 28 additions & 0 deletions pandas/tests/frame/methods/test_convert_dtypes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
import numpy as np
import pytest

import pandas as pd
import pandas._testing as tm


class TestConvertDtypes:
@pytest.mark.parametrize(
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
)
def test_convert_dtypes(self, convert_integer, expected):
# Specific types are tested in tests/series/test_dtypes.py
# Just check that it works for DataFrame here
df = pd.DataFrame(
{
"a": pd.Series([1, 2, 3], dtype=np.dtype("int32")),
"b": pd.Series(["x", "y", "z"], dtype=np.dtype("O")),
}
)
result = df.convert_dtypes(True, True, convert_integer, False)
expected = pd.DataFrame(
{
"a": pd.Series([1, 2, 3], dtype=expected),
"b": pd.Series(["x", "y", "z"], dtype="string"),
}
)
tm.assert_frame_equal(result, expected)
21 changes: 0 additions & 21 deletions pandas/tests/frame/test_dtypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,27 +245,6 @@ def test_str_to_small_float_conversion_type(self):
expected = DataFrame(col_data, columns=["A"], dtype=float)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"convert_integer, expected", [(False, np.dtype("int32")), (True, "Int32")]
)
def test_convert_dtypes(self, convert_integer, expected):
# Specific types are tested in tests/series/test_dtypes.py
# Just check that it works for DataFrame here
df = DataFrame(
{
"a": Series([1, 2, 3], dtype=np.dtype("int32")),
"b": Series(["x", "y", "z"], dtype=np.dtype("O")),
}
)
result = df.convert_dtypes(True, True, convert_integer, False)
expected = DataFrame(
{
"a": Series([1, 2, 3], dtype=expected),
"b": Series(["x", "y", "z"], dtype="string"),
}
)
tm.assert_frame_equal(result, expected)


class TestDataFrameDatetimeWithTZ:
def test_interleave(self, timezone_frame):
Expand Down
288 changes: 287 additions & 1 deletion pandas/tests/series/methods/test_astype.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,97 @@
from datetime import datetime, timedelta
from importlib import reload
import string
import sys

import numpy as np
import pytest

from pandas import NA, Interval, Series, Timestamp, date_range
from pandas._libs.tslibs import iNaT

from pandas import (
NA,
Categorical,
CategoricalDtype,
Index,
Interval,
Series,
Timedelta,
Timestamp,
date_range,
)
import pandas._testing as tm


class TestAstypeAPI:
def test_arg_for_errors_in_astype(self):
# see GH#14878
ser = Series([1, 2, 3])

msg = (
r"Expected value of kwarg 'errors' to be one of \['raise', "
r"'ignore'\]\. Supplied value is 'False'"
)
with pytest.raises(ValueError, match=msg):
ser.astype(np.float64, errors=False)

ser.astype(np.int8, errors="raise")

@pytest.mark.parametrize("dtype_class", [dict, Series])
def test_astype_dict_like(self, dtype_class):
# see GH#7271
ser = Series(range(0, 10, 2), name="abc")

dt1 = dtype_class({"abc": str})
result = ser.astype(dt1)
expected = Series(["0", "2", "4", "6", "8"], name="abc")
tm.assert_series_equal(result, expected)

dt2 = dtype_class({"abc": "float64"})
result = ser.astype(dt2)
expected = Series([0.0, 2.0, 4.0, 6.0, 8.0], dtype="float64", name="abc")
tm.assert_series_equal(result, expected)

dt3 = dtype_class({"abc": str, "def": str})
msg = (
"Only the Series name can be used for the key in Series dtype "
r"mappings\."
)
with pytest.raises(KeyError, match=msg):
ser.astype(dt3)

dt4 = dtype_class({0: str})
with pytest.raises(KeyError, match=msg):
ser.astype(dt4)

# GH#16717
# if dtypes provided is empty, it should error
if dtype_class is Series:
dt5 = dtype_class({}, dtype=object)
else:
dt5 = dtype_class({})

with pytest.raises(KeyError, match=msg):
ser.astype(dt5)


class TestAstype:
@pytest.mark.parametrize("dtype", [np.datetime64, np.timedelta64])
def test_astype_generic_timestamp_no_frequency(self, dtype, request):
# see GH#15524, GH#15987
data = [1]
s = Series(data)

if np.dtype(dtype).name not in ["timedelta64", "datetime64"]:
mark = pytest.mark.xfail(reason="GH#33890 Is assigned ns unit")
request.node.add_marker(mark)

msg = (
fr"The '{dtype.__name__}' dtype has no unit\. "
fr"Please pass in '{dtype.__name__}\[ns\]' instead."
)
with pytest.raises(ValueError, match=msg):
s.astype(dtype)

def test_astype_dt64_to_str(self):
# GH#10442 : testing astype(str) is correct for Series/DatetimeIndex
dti = date_range("2012-01-01", periods=3)
Expand All @@ -27,6 +113,87 @@ def test_astype_dt64tz_to_str(self):
)
tm.assert_series_equal(result, expected)

def test_astype_datetime(self):
s = Series(iNaT, dtype="M8[ns]", index=range(5))

s = s.astype("O")
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0)])

s = s.astype("O")
assert s.dtype == np.object_

s = Series([datetime(2001, 1, 2, 0, 0) for i in range(3)])

s[1] = np.nan
assert s.dtype == "M8[ns]"

s = s.astype("O")
assert s.dtype == np.object_

def test_astype_datetime64tz(self):
s = Series(date_range("20130101", periods=3, tz="US/Eastern"))

# astype
result = s.astype(object)
expected = Series(s.astype(object), dtype=object)
tm.assert_series_equal(result, expected)

result = Series(s.values).dt.tz_localize("UTC").dt.tz_convert(s.dt.tz)
tm.assert_series_equal(result, s)

# astype - object, preserves on construction
result = Series(s.astype(object))
expected = s.astype(object)
tm.assert_series_equal(result, expected)

# astype - datetime64[ns, tz]
result = Series(s.values).astype("datetime64[ns, US/Eastern]")
tm.assert_series_equal(result, s)

result = Series(s.values).astype(s.dtype)
tm.assert_series_equal(result, s)

result = s.astype("datetime64[ns, CET]")
expected = Series(date_range("20130101 06:00:00", periods=3, tz="CET"))
tm.assert_series_equal(result, expected)

def test_astype_str_cast_dt64(self):
# see GH#9757
ts = Series([Timestamp("2010-01-04 00:00:00")])
s = ts.astype(str)

expected = Series(["2010-01-04"])
tm.assert_series_equal(s, expected)

ts = Series([Timestamp("2010-01-04 00:00:00", tz="US/Eastern")])
s = ts.astype(str)

expected = Series(["2010-01-04 00:00:00-05:00"])
tm.assert_series_equal(s, expected)

def test_astype_str_cast_td64(self):
# see GH#9757

td = Series([Timedelta(1, unit="d")])
ser = td.astype(str)

expected = Series(["1 days"])
tm.assert_series_equal(ser, expected)

def test_dt64_series_astype_object(self):
dt64ser = Series(date_range("20130101", periods=3))
result = dt64ser.astype(object)
assert isinstance(result.iloc[0], datetime)
assert result.dtype == np.object_

def test_td64_series_astype_object(self):
tdser = Series(["59 Days", "59 Days", "NaT"], dtype="timedelta64[ns]")
result = tdser.astype(object)
assert isinstance(result.iloc[0], timedelta)
assert result.dtype == np.object_

@pytest.mark.parametrize(
"values",
[
Expand Down Expand Up @@ -70,3 +237,122 @@ def test_astype_to_str_preserves_na(self, value, string_value):
result = s.astype(str)
expected = Series(["a", "b", string_value], dtype=object)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize("dtype", ["float32", "float64", "int64", "int32"])
def test_astype(self, dtype):
s = Series(np.random.randn(5), name="foo")
as_typed = s.astype(dtype)

assert as_typed.dtype == dtype
assert as_typed.name == s.name

@pytest.mark.parametrize("value", [np.nan, np.inf])
@pytest.mark.parametrize("dtype", [np.int32, np.int64])
def test_astype_cast_nan_inf_int(self, dtype, value):
# gh-14265: check NaN and inf raise error when converting to int
msg = "Cannot convert non-finite values \\(NA or inf\\) to integer"
s = Series([value])

with pytest.raises(ValueError, match=msg):
s.astype(dtype)

@pytest.mark.parametrize("dtype", [int, np.int8, np.int64])
def test_astype_cast_object_int_fail(self, dtype):
arr = Series(["car", "house", "tree", "1"])
msg = r"invalid literal for int\(\) with base 10: 'car'"
with pytest.raises(ValueError, match=msg):
arr.astype(dtype)

def test_astype_cast_object_int(self):
arr = Series(["1", "2", "3", "4"], dtype=object)
result = arr.astype(int)

tm.assert_series_equal(result, Series(np.arange(1, 5)))

def test_astype_unicode(self):
# see GH#7758: A bit of magic is required to set
# default encoding to utf-8
digits = string.digits
test_series = [
Series([digits * 10, tm.rands(63), tm.rands(64), tm.rands(1000)]),
Series(["データーサイエンス、お前はもう死んでいる"]),
]

former_encoding = None

if sys.getdefaultencoding() == "utf-8":
test_series.append(Series(["野菜食べないとやばい".encode()]))

for s in test_series:
res = s.astype("unicode")
expec = s.map(str)
tm.assert_series_equal(res, expec)

# Restore the former encoding
if former_encoding is not None and former_encoding != "utf-8":
reload(sys)
sys.setdefaultencoding(former_encoding)


class TestAstypeCategorical:
def test_astype_categoricaldtype(self):
s = Series(["a", "b", "a"])
result = s.astype(CategoricalDtype(["a", "b"], ordered=True))
expected = Series(Categorical(["a", "b", "a"], ordered=True))
tm.assert_series_equal(result, expected)

result = s.astype(CategoricalDtype(["a", "b"], ordered=False))
expected = Series(Categorical(["a", "b", "a"], ordered=False))
tm.assert_series_equal(result, expected)

result = s.astype(CategoricalDtype(["a", "b", "c"], ordered=False))
expected = Series(
Categorical(["a", "b", "a"], categories=["a", "b", "c"], ordered=False)
)
tm.assert_series_equal(result, expected)
tm.assert_index_equal(result.cat.categories, Index(["a", "b", "c"]))

@pytest.mark.parametrize("name", [None, "foo"])
@pytest.mark.parametrize("dtype_ordered", [True, False])
@pytest.mark.parametrize("series_ordered", [True, False])
def test_astype_categorical_to_categorical(
self, name, dtype_ordered, series_ordered
):
# GH#10696, GH#18593
s_data = list("abcaacbab")
s_dtype = CategoricalDtype(list("bac"), ordered=series_ordered)
s = Series(s_data, dtype=s_dtype, name=name)

# unspecified categories
dtype = CategoricalDtype(ordered=dtype_ordered)
result = s.astype(dtype)
exp_dtype = CategoricalDtype(s_dtype.categories, dtype_ordered)
expected = Series(s_data, name=name, dtype=exp_dtype)
tm.assert_series_equal(result, expected)

# different categories
dtype = CategoricalDtype(list("adc"), dtype_ordered)
result = s.astype(dtype)
expected = Series(s_data, name=name, dtype=dtype)
tm.assert_series_equal(result, expected)

if dtype_ordered is False:
# not specifying ordered, so only test once
expected = s
result = s.astype("category")
tm.assert_series_equal(result, expected)

def test_astype_bool_missing_to_categorical(self):
# GH-19182
s = Series([True, False, np.nan])
assert s.dtypes == np.object_

result = s.astype(CategoricalDtype(categories=[True, False]))
expected = Series(Categorical([True, False, np.nan], categories=[True, False]))
tm.assert_series_equal(result, expected)

def test_astype_categories_raises(self):
# deprecated GH#17636, removed in GH#27141
s = Series(["a", "b", "a"])
with pytest.raises(TypeError, match="got an unexpected"):
s.astype("category", categories=["a", "b"], ordered=True)
23 changes: 23 additions & 0 deletions pandas/tests/series/methods/test_infer_objects.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import numpy as np

from pandas import Series
import pandas._testing as tm


class TestInferObjects:
def test_infer_objects_series(self):
# GH#11221
actual = Series(np.array([1, 2, 3], dtype="O")).infer_objects()
expected = Series([1, 2, 3])
tm.assert_series_equal(actual, expected)

actual = Series(np.array([1, 2, 3, None], dtype="O")).infer_objects()
expected = Series([1.0, 2.0, 3.0, np.nan])
tm.assert_series_equal(actual, expected)

# only soft conversions, unconvertable pass thru unchanged
actual = Series(np.array([1, 2, 3, None, "a"], dtype="O")).infer_objects()
expected = Series([1, 2, 3, None, "a"])

assert actual.dtype == "object"
tm.assert_series_equal(actual, expected)

0 comments on commit 6ac3765

Please sign in to comment.