Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

DEPR: enforce Series/DataFrame awareness-mismatch deprecations #48739

Merged
merged 9 commits into from
Oct 20, 2022
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,8 @@ Deprecations

Removal of prior version deprecations/changes
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- Enforced deprecation disallowing passing a timezone-aware :class:`Timestamp` and ``dtype="datetime64[ns]"`` to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`)
- Enforced deprecation disallowing passing a sequence of timezone-aware values and ``dtype="datetime64[ns]"`` to to :class:`Series` or :class:`DataFrame` constructors (:issue:`41555`)
- Removed Date parser functions :func:`~pandas.io.date_converters.parse_date_time`,
:func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields`
and :func:`~pandas.io.date_converters.generic_parser` (:issue:`24518`)
Expand Down
53 changes: 8 additions & 45 deletions pandas/core/dtypes/cast.py
Original file line number Diff line number Diff line change
Expand Up @@ -1358,20 +1358,11 @@ def maybe_cast_to_datetime(
# didn't specify one

if dta.tz is not None:
warnings.warn(
"Data is timezone-aware. Converting "
"timezone-aware data to timezone-naive by "
"passing dtype='datetime64[ns]' to "
"DataFrame or Series is deprecated and will "
"raise in a future version. Use "
"`pd.Series(values).dt.tz_localize(None)` "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
raise ValueError(
"Cannot convert timezone-aware data to "
"timezone-naive dtype. Use "
"pd.Series(values).dt.tz_localize(None) instead."
)
# equiv: dta.view(dtype)
# Note: NOT equivalent to dta.astype(dtype)
dta = dta.tz_localize(None)

# TODO(2.0): Do this astype in sequence_to_datetimes to
# avoid potential extra copy?
Expand Down Expand Up @@ -1678,7 +1669,7 @@ def construct_2d_arraylike_from_scalar(
shape = (length, width)

if dtype.kind in ["m", "M"]:
value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype)
value = _maybe_box_and_unbox_datetimelike(value, dtype)
elif dtype == _dtype_obj:
if isinstance(value, (np.timedelta64, np.datetime64)):
# calling np.array below would cast to pytimedelta/pydatetime
Expand Down Expand Up @@ -1742,7 +1733,7 @@ def construct_1d_arraylike_from_scalar(
if not isna(value):
value = ensure_str(value)
elif dtype.kind in ["M", "m"]:
value = _maybe_unbox_datetimelike_tz_deprecation(value, dtype)
value = _maybe_box_and_unbox_datetimelike(value, dtype)

subarr = np.empty(length, dtype=dtype)
if length:
Expand All @@ -1752,42 +1743,14 @@ def construct_1d_arraylike_from_scalar(
return subarr


def _maybe_unbox_datetimelike_tz_deprecation(value: Scalar, dtype: DtypeObj):
"""
Wrap _maybe_unbox_datetimelike with a check for a timezone-aware Timestamp
along with a timezone-naive datetime64 dtype, which is deprecated.
"""
def _maybe_box_and_unbox_datetimelike(value: Scalar, dtype: DtypeObj):
# Caller is responsible for checking dtype.kind in ["m", "M"]

if isinstance(value, datetime):
# we dont want to box dt64, in particular datetime64("NaT")
value = maybe_box_datetimelike(value, dtype)

try:
value = _maybe_unbox_datetimelike(value, dtype)
except TypeError:
if (
isinstance(value, Timestamp)
and value.tzinfo is not None
and isinstance(dtype, np.dtype)
and dtype.kind == "M"
):
warnings.warn(
"Data is timezone-aware. Converting "
"timezone-aware data to timezone-naive by "
"passing dtype='datetime64[ns]' to "
"DataFrame or Series is deprecated and will "
"raise in a future version. Use "
"`pd.Series(values).dt.tz_localize(None)` "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
new_value = value.tz_localize(None)
return _maybe_unbox_datetimelike(new_value, dtype)
else:
raise
return value
return _maybe_unbox_datetimelike(value, dtype)


def construct_1d_object_array_from_listlike(values: Sized) -> np.ndarray:
Expand Down
68 changes: 36 additions & 32 deletions pandas/tests/frame/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -2848,37 +2848,32 @@ def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt):
ts = Timestamp("2019", tz=tz)
if pydt:
ts = ts.to_pydatetime()
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame({0: [ts]}, dtype="datetime64[ns]")
msg = (
"Cannot convert timezone-aware data to timezone-naive dtype. "
r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
)
with pytest.raises(ValueError, match=msg):
DataFrame({0: [ts]}, dtype="datetime64[ns]")

expected = DataFrame({0: [ts_naive]})
tm.assert_frame_equal(result, expected)
msg2 = "Cannot unbox tzaware Timestamp to tznaive dtype"
with pytest.raises(TypeError, match=msg2):
DataFrame({0: ts}, index=[0], dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame({0: ts}, index=[0], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError, match=msg):
DataFrame([ts], dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame([ts], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError, match=msg):
DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame(np.array([ts], dtype=object), dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)
with pytest.raises(TypeError, match=msg2):
DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = DataFrame(ts, index=[0], columns=[0], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)

with tm.assert_produces_warning(FutureWarning):
df = DataFrame([Series([ts])], dtype="datetime64[ns]")
tm.assert_frame_equal(result, expected)
with pytest.raises(ValueError, match=msg):
DataFrame([Series([ts])], dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
df = DataFrame([[ts]], columns=[0], dtype="datetime64[ns]")
tm.assert_equal(df, expected)
with pytest.raises(ValueError, match=msg):
DataFrame([[ts]], columns=[0], dtype="datetime64[ns]")

def test_from_dict(self):

Expand Down Expand Up @@ -3051,8 +3046,11 @@ def get1(obj): # TODO: make a helper in tm?

class TestFromScalar:
@pytest.fixture(params=[list, dict, None])
def constructor(self, request, frame_or_series):
box = request.param
def box(self, request):
return request.param

@pytest.fixture
def constructor(self, frame_or_series, box):

extra = {"index": range(2)}
if frame_or_series is DataFrame:
Expand Down Expand Up @@ -3181,16 +3179,22 @@ def test_out_of_s_bounds_timedelta64(self, constructor):
dtype = result.dtype if isinstance(result, Series) else result.dtypes.iloc[0]
assert dtype == object

def test_tzaware_data_tznaive_dtype(self, constructor):
def test_tzaware_data_tznaive_dtype(self, constructor, box, frame_or_series):
tz = "US/Eastern"
ts = Timestamp("2019", tz=tz)
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(FutureWarning, match="Data is timezone-aware"):
result = constructor(ts, dtype="M8[ns]")
if box is None or (frame_or_series is DataFrame and box is dict):
msg = "Cannot unbox tzaware Timestamp to tznaive dtype"
err = TypeError
else:
msg = (
"Cannot convert timezone-aware data to timezone-naive dtype. "
r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
)
err = ValueError

assert np.all(result.dtypes == "M8[ns]")
assert np.all(result == ts_naive)
with pytest.raises(err, match=msg):
constructor(ts, dtype="M8[ns]")


# TODO: better location for this test?
Expand Down
27 changes: 13 additions & 14 deletions pandas/tests/series/test_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -1736,24 +1736,23 @@ def test_constructor_data_aware_dtype_naive(self, tz_aware_fixture, pydt):
ts = Timestamp("2019", tz=tz)
if pydt:
ts = ts.to_pydatetime()
ts_naive = Timestamp("2019")

with tm.assert_produces_warning(FutureWarning):
result = Series([ts], dtype="datetime64[ns]")
expected = Series([ts_naive])
tm.assert_series_equal(result, expected)
msg = (
"Cannot convert timezone-aware data to timezone-naive dtype. "
r"Use pd.Series\(values\).dt.tz_localize\(None\) instead."
)
with pytest.raises(ValueError, match=msg):
Series([ts], dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = Series(np.array([ts], dtype=object), dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)
with pytest.raises(ValueError, match=msg):
Series(np.array([ts], dtype=object), dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = Series({0: ts}, dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)
with pytest.raises(ValueError, match=msg):
Series({0: ts}, dtype="datetime64[ns]")

with tm.assert_produces_warning(FutureWarning):
result = Series(ts, index=[0], dtype="datetime64[ns]")
tm.assert_series_equal(result, expected)
msg = "Cannot unbox tzaware Timestamp to tznaive dtype"
with pytest.raises(TypeError, match=msg):
Series(ts, index=[0], dtype="datetime64[ns]")

def test_constructor_datetime64(self):
rng = date_range("1/1/2000 00:00:00", "1/1/2000 1:59:50", freq="10s")
Expand Down