Skip to content

Commit

Permalink
ENH: group by multiple levels, GH #103
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Nov 13, 2011
1 parent eb3d1b1 commit 73ab1c1
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 18 deletions.
2 changes: 2 additions & 0 deletions RELEASE.rst
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ pandas 0.5.1
- Add `DataFrame.from_dict` with similar `orient` option
- Can now pass list of tuples or list of lists to `DataFrame.from_records`
for fast conversion to DataFrame (GH #357)
- Can pass multiple levels to groupby, e.g. `df.groupby(level=[0, 1])` (GH
#103)
**Improvements to existing features**

Expand Down
4 changes: 2 additions & 2 deletions pandas/core/generic.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def groupby(self, by=None, axis=0, level=None, as_index=True):
If a dict or Series is passed, the Series or dict VALUES will be
used to determine the groups
axis : int, default 0
level : int, default None
level : int, level name, or sequence of such, default None
If the axis is a MultiIndex (hierarchical), group by a particular
level
level or levels
as_index : boolean, default True
For aggregated output, return object with group labels as the
index. Only relevant for DataFrame input. as_index=False is
Expand Down
36 changes: 20 additions & 16 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,25 +608,29 @@ def _get_groupings(obj, grouper=None, axis=0, level=None):
if level is not None and not isinstance(group_axis, MultiIndex):
raise ValueError('can only specify level with multi-level index')

if not isinstance(grouper, (tuple, list)):
groupers = [grouper]
else:
groupers = grouper

if isinstance(level, (tuple, list)):
if grouper is None:
groupers = [None] * len(level)
levels = level
else:
levels = [level] * len(groupers)

groupings = []
exclusions = []
if isinstance(grouper, (tuple, list)):
for i, arg in enumerate(grouper):
name = 'key_%d' % i
if isinstance(arg, basestring):
exclusions.append(arg)
name = arg
arg = obj[arg]

ping = Grouping(group_axis, arg, name=name, level=level)
groupings.append(ping)
else:
for i, (gpr, level) in enumerate(zip(groupers, levels)):
name = None
if isinstance(grouper, basestring):
exclusions.append(grouper)
name = grouper
grouper = obj[grouper]
ping = Grouping(group_axis, grouper, name=name, level=level)
if isinstance(gpr, basestring):
exclusions.append(gpr)
name = gpr
gpr = obj[gpr]
ping = Grouping(group_axis, gpr, name=name, level=level)
if ping.name is None:
ping.name = 'key_%d' % i
groupings.append(ping)

return groupings, exclusions
Expand Down
17 changes: 17 additions & 0 deletions pandas/tests/test_multilevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,6 +597,23 @@ def test_frame_group_ops(self):
skipna=skipna)
assert_frame_equal(leftside, rightside)

def test_groupby_multilevel(self):
result = self.ymd.groupby(level=[0, 1]).mean()

k1 = self.ymd.index.get_level_values(0)
k2 = self.ymd.index.get_level_values(1)

expected = self.ymd.groupby([k1, k2]).mean()

assert_frame_equal(result, expected)
self.assertEquals(result.index.names, self.ymd.index.names[:2])

result2 = self.ymd.groupby(level=self.ymd.index.names[:2]).mean()
assert_frame_equal(result, result2)

def test_groupby_multilevel_with_transform(self):
pass

if __name__ == '__main__':

# unittest.main()
Expand Down

0 comments on commit 73ab1c1

Please sign in to comment.