pandas-dev · jreback · Mar 24, 2013 · Mar 23, 2013
diff --git a/RELEASE.rst b/RELEASE.rst
@@ -198,7 +198,7 @@ pandas 0.11.0
     an irrecoverable state (GH3010_)
   - Bug in DataFrame update, combine_first where non-specified values could cause
     dtype changes (GH3016_, GH3041_)
-  - Bug in groupby with first/last where dtypes could change (GH3041_)
+  - Bug in groupby with first/last where dtypes could change (GH3041_, GH2763_)
   - Formatting of an index that has ``nan`` was inconsistent or wrong (would fill from
     other values), (GH2850_)
   - Unstack of a frame with no nans would always cause dtype upcasting (GH2929_)
@@ -251,6 +251,7 @@ pandas 0.11.0
 .. _GH2746: https://github.com/pydata/pandas/issues/2746
 .. _GH2747: https://github.com/pydata/pandas/issues/2747
 .. _GH2751: https://github.com/pydata/pandas/issues/2751
+.. _GH2763: https://github.com/pydata/pandas/issues/2763
 .. _GH2776: https://github.com/pydata/pandas/issues/2776
 .. _GH2778: https://github.com/pydata/pandas/issues/2778
 .. _GH2787: https://github.com/pydata/pandas/issues/2787

diff --git a/pandas/core/groupby.py b/pandas/core/groupby.py
@@ -13,6 +13,7 @@
 from pandas.util.compat import OrderedDict
 import pandas.core.algorithms as algos
 import pandas.core.common as com
+from pandas.core.common import _possibly_downcast_to_dtype
 
 import pandas.lib as lib
 import pandas.algos as _algos
@@ -440,14 +441,7 @@ def _try_cast(self, result, obj):
 
                 # need to respect a non-number here (e.g. Decimal)
                 if len(result) and issubclass(type(result[0]),(np.number,float,int)):
-                    if issubclass(dtype.type, (np.integer, np.bool_)):
-
-                        # castable back to an int/bool as we don't have nans
-                        if com.notnull(result).all():
-                            result = result.astype(dtype)
-                    else:
-
-                        result = result.astype(dtype)
+                    result = _possibly_downcast_to_dtype(result, dtype)
 
             elif issubclass(dtype.type, np.datetime64):
                 if is_datetime64_dtype(obj.dtype):
@@ -468,7 +462,7 @@ def _cython_agg_general(self, how, numeric_only=True):
                 result, names = self.grouper.aggregate(obj.values, how)
             except AssertionError as e:
                 raise GroupByError(str(e))
-            output[name] = result
+            output[name] = self._try_cast(result, obj)
 
         if len(output) == 0:
             raise DataError('No numeric types to aggregate')

diff --git a/pandas/tests/test_groupby.py b/pandas/tests/test_groupby.py
@@ -91,48 +91,51 @@ def setUp(self):
                                       'F': np.random.randn(11)})
 
     def test_basic(self):
-        data = Series(np.arange(9) // 3, index=np.arange(9))
 
-        index = np.arange(9)
-        np.random.shuffle(index)
-        data = data.reindex(index)
+        def checkit(dtype):
+            data = Series(np.arange(9) // 3, index=np.arange(9), dtype=dtype)
 
-        grouped = data.groupby(lambda x: x // 3)
+            index = np.arange(9)
+            np.random.shuffle(index)
+            data = data.reindex(index)
 
-        for k, v in grouped:
-            self.assertEqual(len(v), 3)
+            grouped = data.groupby(lambda x: x // 3)
 
-        agged = grouped.aggregate(np.mean)
-        self.assertEqual(agged[1], 1)
+            for k, v in grouped:
+                self.assertEqual(len(v), 3)
 
-        assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
-        assert_series_equal(agged, grouped.mean())
+            agged = grouped.aggregate(np.mean)
+            self.assertEqual(agged[1], 1)
 
-        # Cython only returning floating point for now...
-        assert_series_equal(grouped.agg(np.sum).astype(float),
-                            grouped.sum())
+            assert_series_equal(agged, grouped.agg(np.mean))  # shorthand
+            assert_series_equal(agged, grouped.mean())
+            assert_series_equal(grouped.agg(np.sum),grouped.sum())
 
-        transformed = grouped.transform(lambda x: x * x.sum())
-        self.assertEqual(transformed[7], 12)
+            transformed = grouped.transform(lambda x: x * x.sum())
+            self.assertEqual(transformed[7], 12)
 
-        value_grouped = data.groupby(data)
-        assert_series_equal(value_grouped.aggregate(np.mean), agged)
+            value_grouped = data.groupby(data)
+            assert_series_equal(value_grouped.aggregate(np.mean), agged)
 
-        # complex agg
-        agged = grouped.aggregate([np.mean, np.std])
-        agged = grouped.aggregate({'one': np.mean,
-                                   'two': np.std})
+            # complex agg
+            agged = grouped.aggregate([np.mean, np.std])
+            agged = grouped.aggregate({'one': np.mean,
+                                       'two': np.std})
+
+            group_constants = {
+                0: 10,
+                1: 20,
+                2: 30
+                }
+            agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
+            self.assertEqual(agged[1], 21)
 
-        group_constants = {
-            0: 10,
-            1: 20,
-            2: 30
-        }
-        agged = grouped.agg(lambda x: group_constants[x.name] + x.mean())
-        self.assertEqual(agged[1], 21)
+            # corner cases
+            self.assertRaises(Exception, grouped.aggregate, lambda x: x * 2)
 
-        # corner cases
-        self.assertRaises(Exception, grouped.aggregate, lambda x: x * 2)
+
+        for dtype in ['int64','int32','float64','float32']:
+            checkit(dtype)
 
     def test_first_last_nth(self):
         # tests for first / last / nth
@@ -185,6 +188,14 @@ def test_first_last_nth_dtypes(self):
         expected.index = ['bar', 'foo']
         assert_frame_equal(nth, expected, check_names=False)
 
+        # GH 2763, first/last shifting dtypes
+        idx = range(10)
+        idx.append(9)
+        s = Series(data=range(11), index=idx, name='IntCol')
+        self.assert_(s.dtype == 'int64')
+        f = s.groupby(level=0).first()
+        self.assert_(f.dtype == 'int64')
+
     def test_grouper_iter(self):
         self.assertEqual(sorted(self.df.groupby('A').grouper), ['bar', 'foo'])
 

diff --git a/pandas/tseries/tests/test_resample.py b/pandas/tseries/tests/test_resample.py
@@ -71,9 +71,9 @@ def test_custom_grouper(self):
         idx = idx.append(dti[-1:])
         expect = Series(arr, index=idx)
 
-        # cython returns float for now
+        # GH2763 - return in put dtype if we can
         result = g.agg(np.sum)
-        assert_series_equal(result, expect.astype(float))
+        assert_series_equal(result, expect)
 
         data = np.random.rand(len(dti), 10)
         df = DataFrame(data, index=dti)