Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: datetimelike.astype(int_other_than_i8) return requested dtype #45574

Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ Other Deprecations
- Deprecated behavior of :meth:`DatetimeIndex.intersection` and :meth:`DatetimeIndex.symmetric_difference` (``union`` behavior was already deprecated in version 1.3.0) with mixed timezones; in a future version both will be cast to UTC instead of object dtype (:issue:`39328`, :issue:`45357`)
- Deprecated :meth:`DataFrame.iteritems`, :meth:`Series.iteritems`, :meth:`HDFStore.iteritems` in favor of :meth:`DataFrame.items`, :meth:`Series.items`, :meth:`HDFStore.items` (:issue:`45321`)
- Deprecated :meth:`Series.is_monotonic` and :meth:`Index.is_monotonic` in favor of :meth:`Series.is_monotonic_increasing` and :meth:`Index.is_monotonic_increasing` (:issue:`45422`, :issue:`21335`)
- Deprecated behavior of :meth:`DatetimeIndex.astype`, :meth:`TimedeltaIndex.astype`, :meth:`PeriodIndex.astype` when converting to an integer dtype other than ``int64``. In a future version, these will convert to exactly the specified dtype (instead of always ``int64``) and will raise if the conversion overflows (:issue:`45034`)
- Deprecated the ``__array_wrap__`` method of DataFrame and Series, rely on standard numpy ufuncs instead (:issue:`45451`)
- Deprecated the behavior of :meth:`Series.fillna` and :meth:`DataFrame.fillna` with ``timedelta64[ns]`` dtype and incompatible fill value; in a future version this will cast to a common dtype (usually object) instead of raising, matching the behavior of other dtypes (:issue:`45746`)
-
Expand Down
29 changes: 29 additions & 0 deletions pandas/core/arrays/datetimelike.py
Original file line number Diff line number Diff line change
Expand Up @@ -446,6 +446,35 @@ def astype(self, dtype, copy: bool = True):
if is_unsigned_integer_dtype(dtype):
# Again, we ignore int32 vs. int64
values = values.view("uint64")
if dtype != np.uint64:
# GH#45034
warnings.warn(
f"The behavior of .astype from {self.dtype} to {dtype} is "
"deprecated. In a future version, this astype will return "
"exactly the specified dtype instead of uint64, and will "
"raise if that conversion overflows.",
FutureWarning,
stacklevel=find_stack_level(),
)
elif (self.asi8 < 0).any():
# GH#45034
warnings.warn(
f"The behavior of .astype from {self.dtype} to {dtype} is "
"deprecated. In a future version, this astype will "
"raise if the conversion overflows.",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would make this "if the conversion overflow" a bit clearer in that it does actually happen with the current values (so it's not a hypothetical "if")

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How about "[...] raise if the conversion overflows, as it did in this case with negative int64 values."?

FutureWarning,
stacklevel=find_stack_level(),
)
elif dtype != np.int64:
# GH#45034
warnings.warn(
f"The behavior of .astype from {self.dtype} to {dtype} is "
"deprecated. In a future version, this astype will return "
"exactly the specified dtype instead of int64, and will "
"raise if that conversion overflows.",
FutureWarning,
stacklevel=find_stack_level(),
)

if copy:
values = values.copy()
Expand Down
19 changes: 15 additions & 4 deletions pandas/tests/arrays/period/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,29 @@


@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype(dtype):
def test_astype_int(dtype):
# We choose to ignore the sign and size of integers for
# Period/Datetime/Timedelta astype
arr = period_array(["2000", "2001", None], freq="D")
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
warn1 = FutureWarning
else:
expected_dtype = np.dtype("int64")

expected = arr.astype(expected_dtype)
warn1 = None

msg_overflow = "will raise if the conversion overflows"
with tm.assert_produces_warning(warn1, match=msg_overflow):
expected = arr.astype(expected_dtype)

warn = None if dtype == expected_dtype else FutureWarning
msg = " will return exactly the specified dtype"
if warn is None and warn1 is not None:
warn = warn1
msg = msg_overflow
with tm.assert_produces_warning(warn, match=msg):
result = arr.astype(dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)
Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/arrays/test_datetimes.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,14 +77,20 @@ def test_astype_copies(self, dtype, other):
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
arr = DatetimeArray._from_sequence([pd.Timestamp("2000"), pd.Timestamp("2001")])
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)

warn = None
if dtype != expected_dtype:
warn = FutureWarning
msg = " will return exactly the specified dtype"
with tm.assert_produces_warning(warn, match=msg):
result = arr.astype(dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)

Expand Down
8 changes: 7 additions & 1 deletion pandas/tests/arrays/test_timedeltas.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,14 +11,20 @@ class TestTimedeltaArray:
@pytest.mark.parametrize("dtype", [int, np.int32, np.int64, "uint32", "uint64"])
def test_astype_int(self, dtype):
arr = TimedeltaArray._from_sequence([Timedelta("1H"), Timedelta("2H")])
result = arr.astype(dtype)

if np.dtype(dtype).kind == "u":
expected_dtype = np.dtype("uint64")
else:
expected_dtype = np.dtype("int64")
expected = arr.astype(expected_dtype)

warn = None
if dtype != expected_dtype:
warn = FutureWarning
msg = " will return exactly the specified dtype"
with tm.assert_produces_warning(warn, match=msg):
result = arr.astype(dtype)

assert result.dtype == expected_dtype
tm.assert_numpy_array_equal(result, expected)

Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/indexes/datetimes/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,11 @@ def test_astype_uint(self):
name="idx",
)
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

msg = "will return exactly the specified dtype instead of uint64"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = arr.astype("uint32")
tm.assert_index_equal(res, expected)

def test_astype_with_tz(self):

Expand Down
16 changes: 12 additions & 4 deletions pandas/tests/indexes/interval/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -205,10 +205,18 @@ def index(self, request):
@pytest.mark.parametrize("subtype", ["int64", "uint64"])
def test_subtype_integer(self, index, subtype):
dtype = IntervalDtype(subtype, "right")
result = index.astype(dtype)
expected = IntervalIndex.from_arrays(
index.left.astype(subtype), index.right.astype(subtype), closed=index.closed
)

warn = None
if index.isna().any() and subtype == "uint64":
warn = FutureWarning
msg = "In a future version, this astype will raise if the conversion overflows"

with tm.assert_produces_warning(warn, match=msg):
result = index.astype(dtype)
new_left = index.left.astype(subtype)
new_right = index.right.astype(subtype)

expected = IntervalIndex.from_arrays(new_left, new_right, closed=index.closed)
tm.assert_index_equal(result, expected)

def test_subtype_float(self, index):
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/indexes/period/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,11 @@ def test_astype_uint(self):
arr = period_range("2000", periods=2, name="idx")
expected = UInt64Index(np.array([10957, 10958], dtype="uint64"), name="idx")
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

msg = "will return exactly the specified dtype instead of uint64"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = arr.astype("uint32")
tm.assert_index_equal(res, expected)

def test_astype_object(self):
idx = PeriodIndex([], freq="M")
Expand Down
6 changes: 5 additions & 1 deletion pandas/tests/indexes/timedeltas/methods/test_astype.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,11 @@ def test_astype_uint(self):
np.array([3600000000000, 90000000000000], dtype="uint64")
)
tm.assert_index_equal(arr.astype("uint64"), expected)
tm.assert_index_equal(arr.astype("uint32"), expected)

msg = "will return exactly the specified dtype instead of uint64"
with tm.assert_produces_warning(FutureWarning, match=msg):
res = arr.astype("uint32")
tm.assert_index_equal(res, expected)

def test_astype_timedelta64(self):
# GH 13149, GH 13209
Expand Down