Skip to content

Commit

Permalink
BUG: GH11847 Unstack with mixed dtypes coerces everything to object
Browse files Browse the repository at this point in the history
Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
  • Loading branch information
kordek committed Sep 25, 2016
1 parent 99b5876 commit 47938ad
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 3 deletions.
3 changes: 2 additions & 1 deletion doc/source/whatsnew/v0.19.0.txt
Expand Up @@ -1573,8 +1573,9 @@ Bug Fixes
- Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
- Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
- Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.

- Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`)
- Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`)
- ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
- Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`)
- Bug in ``read_csv()``, where aliases for utf-xx (e.g. UTF-xx, UTF_xx, utf_xx) raised UnicodeDecodeError (:issue:`13549`)
- Bug in ``unstack()`` If called with list of column(s) as argument, regardless of dtypes all columns get coerced to ``object`` (:issue:`11847`)
6 changes: 4 additions & 2 deletions pandas/core/reshape.py
Expand Up @@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
verify_integrity=False)

if isinstance(data, Series):
dummy = Series(data.values, index=dummy_index)
dummy = data.copy()
dummy.index = dummy_index
unstacked = dummy.unstack('__placeholder__')
new_levels = clevels
new_names = cnames
Expand All @@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):

return result

dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
dummy = data.copy()
dummy.index = dummy_index

unstacked = dummy.unstack('__placeholder__')
if isinstance(unstacked, Series):
Expand Down
40 changes: 40 additions & 0 deletions pandas/tests/frame/test_reshape.py
Expand Up @@ -282,6 +282,46 @@ def test_unstack_fill_frame_categorical(self):
index=list('xyz'))
assert_frame_equal(result, expected)

def test_unstack_preserve_dtypes(self):
# Checks fix for #11847
df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
index=['a', 'b', 'c'],
A=np.random.rand(3),
B=1,
C='foo',
D=pd.Timestamp('20010102'),
E=pd.Series([1.0, 50.0, 100.0]
).astype('float32'),
F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
G=False,
H=pd.Series([1, 200, 923442], dtype='int8')))

df1 = df.set_index(['state', 'index'])
unstacked1 = df1.unstack(['index'])
unstacked2 = df1.unstack('index')
assert_frame_equal(unstacked1, unstacked2)

df1 = df.set_index(['F', 'C'])
unstacked1 = df1.unstack(['F'])
unstacked2 = df1.unstack('F')
assert_frame_equal(unstacked1, unstacked2)

df1 = df.set_index(['G', 'B', 'state'])
unstacked1 = df1.unstack(['B'])
unstacked2 = df1.unstack('B')
assert_frame_equal(unstacked1, unstacked2)

df1 = df.set_index(['E', 'A'])
unstacked1 = df1.unstack(['E'])
unstacked2 = df1.unstack('E')
assert_frame_equal(unstacked1, unstacked2)

df1 = df.set_index(['state', 'index'])
s = df1['A']
unstacked1 = s.unstack(['index'])
unstacked2 = s.unstack('index')
assert_frame_equal(unstacked1, unstacked2)

def test_stack_ints(self):
columns = MultiIndex.from_tuples(list(itertools.product(range(3),
repeat=3)))
Expand Down

0 comments on commit 47938ad

Please sign in to comment.