BUG: GH11847 Unstack with mixed dtypes coerces everything to object

Changed the way in which the original data frame is copied (dropped use of .values, since it does not preserve dtypes).
pandas-dev · Sep 25, 2016 · 47938ad · 47938ad
1 parent 99b5876
commit 47938ad
Show file tree

Hide file tree

Showing 3 changed files with 46 additions and 3 deletions.
diff --git a/doc/source/whatsnew/v0.19.0.txt b/doc/source/whatsnew/v0.19.0.txt
@@ -1573,8 +1573,9 @@ Bug Fixes
 - Bug in ``Index`` raises ``KeyError`` displaying incorrect column when column is not in the df and columns contains duplicate values (:issue:`13822`)
 - Bug in ``Period`` and ``PeriodIndex`` creating wrong dates when frequency has combined offset aliases (:issue:`13874`)
 - Bug in ``.to_string()`` when called with an integer ``line_width`` and ``index=False`` raises an UnboundLocalError exception because ``idx`` referenced before assignment.
-
 - Bug in ``eval()`` where the ``resolvers`` argument would not accept a list (:issue:`14095`)
 - Bugs in ``stack``, ``get_dummies``, ``make_axis_dummies`` which don't preserve categorical dtypes in (multi)indexes (:issue:`13854`)
 - ``PeridIndex`` can now accept ``list`` and ``array`` which contains ``pd.NaT`` (:issue:`13430`)
 - Bug in ``df.groupby`` where ``.median()`` returns arbitrary values if grouped dataframe contains empty bins (:issue:`13629`)
+- Bug in ``read_csv()``, where aliases for utf-xx (e.g. UTF-xx, UTF_xx, utf_xx) raised UnicodeDecodeError (:issue:`13549`)
+- Bug in ``unstack()`` If called with list of column(s) as argument, regardless of dtypes all columns get coerced to ``object`` (:issue:`11847`)
diff --git a/pandas/core/reshape.py b/pandas/core/reshape.py
@@ -277,7 +277,8 @@ def _unstack_multiple(data, clocs):
                              verify_integrity=False)
 
     if isinstance(data, Series):
-        dummy = Series(data.values, index=dummy_index)
+        dummy = data.copy()
+        dummy.index = dummy_index
         unstacked = dummy.unstack('__placeholder__')
         new_levels = clevels
         new_names = cnames
@@ -292,7 +293,8 @@ def _unstack_multiple(data, clocs):
 
             return result
 
-        dummy = DataFrame(data.values, index=dummy_index, columns=data.columns)
+        dummy = data.copy()
+        dummy.index = dummy_index
 
         unstacked = dummy.unstack('__placeholder__')
         if isinstance(unstacked, Series):

diff --git a/pandas/tests/frame/test_reshape.py b/pandas/tests/frame/test_reshape.py
@@ -282,6 +282,46 @@ def test_unstack_fill_frame_categorical(self):
                              index=list('xyz'))
         assert_frame_equal(result, expected)
 
+    def test_unstack_preserve_dtypes(self):
+        # Checks fix for #11847
+        df = pd.DataFrame(dict(state=['IL', 'MI', 'NC'],
+                               index=['a', 'b', 'c'],
+                               A=np.random.rand(3),
+                               B=1,
+                               C='foo',
+                               D=pd.Timestamp('20010102'),
+                               E=pd.Series([1.0, 50.0, 100.0]
+                                           ).astype('float32'),
+                               F=pd.Series([3.0, 4.0, 5.0]).astype('float64'),
+                               G=False,
+                               H=pd.Series([1, 200, 923442], dtype='int8')))
+
+        df1 = df.set_index(['state', 'index'])
+        unstacked1 = df1.unstack(['index'])
+        unstacked2 = df1.unstack('index')
+        assert_frame_equal(unstacked1, unstacked2)
+
+        df1 = df.set_index(['F', 'C'])
+        unstacked1 = df1.unstack(['F'])
+        unstacked2 = df1.unstack('F')
+        assert_frame_equal(unstacked1, unstacked2)
+
+        df1 = df.set_index(['G', 'B', 'state'])
+        unstacked1 = df1.unstack(['B'])
+        unstacked2 = df1.unstack('B')
+        assert_frame_equal(unstacked1, unstacked2)
+
+        df1 = df.set_index(['E', 'A'])
+        unstacked1 = df1.unstack(['E'])
+        unstacked2 = df1.unstack('E')
+        assert_frame_equal(unstacked1, unstacked2)
+
+        df1 = df.set_index(['state', 'index'])
+        s = df1['A']
+        unstacked1 = s.unstack(['index'])
+        unstacked2 = s.unstack('index')
+        assert_frame_equal(unstacked1, unstacked2)
+
     def test_stack_ints(self):
         columns = MultiIndex.from_tuples(list(itertools.product(range(3),
                                                                 repeat=3)))