-
-
Notifications
You must be signed in to change notification settings - Fork 17.6k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Preserve EA dtype in DataFrame.stack #23285
Changes from 11 commits
381b073
428f230
0d39be0
fc37932
7bb5a5e
7e9224a
d6661cb
3d41f5b
9f91df0
144d117
98f75c9
88f7f3e
2b858b8
88f08c7
d305c86
f6aeafa
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,6 +36,7 @@ | |
is_list_like, | ||
is_re, | ||
is_re_compilable, | ||
is_sparse, | ||
pandas_dtype) | ||
from pandas.core.dtypes.cast import ( | ||
maybe_downcast_to_dtype, | ||
|
@@ -632,7 +633,10 @@ def _astype(self, dtype, copy=False, errors='raise', values=None, | |
return self | ||
|
||
if klass is None: | ||
if dtype == np.object_: | ||
if is_sparse(self.values): | ||
# Series[Sparse].astype(object) is sparse. | ||
klass = ExtensionBlock | ||
elif is_object_dtype(dtype): | ||
klass = ObjectBlock | ||
elif is_extension_array_dtype(dtype): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. so maybe should just move the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'll make that change and run the test suite. I was kinda worried about "false positives" here, but I suppose it's exactly what we want if an extension array claims it's object dtype. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As posted in the unstack PR, we need to special case Space here, since it's the only (internal) extension type that has special |
||
klass = ExtensionBlock | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -170,3 +170,25 @@ def test_merge(self, data, na_value): | |
[data[0], data[0], data[1], data[2], na_value], | ||
dtype=data.dtype)}) | ||
self.assert_frame_equal(res, exp[['ext', 'int1', 'key', 'int2']]) | ||
|
||
@pytest.mark.parametrize("columns", [ | ||
["A", "B"], | ||
pd.MultiIndex.from_tuples([('A', 'a'), ('A', 'b')], | ||
names=['outer', 'inner']), | ||
]) | ||
def test_stack(self, data, columns): | ||
df = pd.DataFrame({"A": data[:5], "B": data[:5]}) | ||
df.columns = columns | ||
result = df.stack() | ||
expected = df.astype(object).stack() | ||
# we need a second astype(object), in case the constructor inferred | ||
# object -> specialized, as is done for period. | ||
expected = expected.astype(object) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is kinda strange. For In [1]: import pandas as pd
In [2]: a = pd.core.arrays.period_array(['2000', '2001'], freq='D')
In [3]: pd.DataFrame({"A": a, "B": a}).astype(object).dtypes
Out[3]:
A object
B object
dtype: object
In [4]: pd.DataFrame({"A": a, "B": a}).astype(object).stack().dtype
Out[4]: period[D] (that's on master) |
||
|
||
if isinstance(expected, pd.Series): | ||
assert result.dtype == df.iloc[:, 0].dtype | ||
else: | ||
assert all(result.dtypes == df.iloc[:, 0].dtype) | ||
|
||
result = result.astype(object) | ||
self.assert_equal(result, expected) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was accidentally added in the PeriodArray PR. Will be implemented for good in #23284