Skip to content

Commit

Permalink
TST: completed groupby test (line) coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
wesm committed Aug 21, 2011
1 parent 907ef37 commit 00c8da0
Show file tree
Hide file tree
Showing 3 changed files with 97 additions and 35 deletions.
1 change: 1 addition & 0 deletions RELEASE.rst
Expand Up @@ -180,6 +180,7 @@ Release notes
with a `fill_value` keyword argument defaulting to NaN
* Removed `pandas.core.pytools` module. Code has been moved to
`pandas.core.common`
* Tacked on `groupName` attribute for groups in GroupBy renamed to `name`

**Bug fixes**

Expand Down
37 changes: 10 additions & 27 deletions pandas/core/groupby.py
@@ -1,12 +1,11 @@
from itertools import izip
import sys
import types

import numpy as np

from pandas.core.frame import DataFrame
from pandas.core.generic import NDFrame, PandasObject
from pandas.core.index import Factor, Index, MultiIndex
from pandas.core.index import Index, MultiIndex
from pandas.core.internals import BlockManager
from pandas.core.series import Series
from pandas.core.panel import WidePanel
Expand Down Expand Up @@ -295,10 +294,9 @@ def _python_apply_general(self, arg):
not_indexed_same = False

for data in self:
if key_as_tuple:
key = data[:-1]
else:
key = data[0]
key = data[:-1]
if not key_as_tuple:
key = key[0]

group = data[-1]
group.name = key
Expand All @@ -323,7 +321,7 @@ def _wrap_frames(self, keys, values, not_indexed_same=False):
self.groupings,
axis=self.axis)
else:
result = _concat_frames(values)
result = _concat_frames(values, self.obj.index)

return result

Expand All @@ -349,9 +347,9 @@ def _is_indexed_like(obj, other):
elif isinstance(obj, DataFrame):
if isinstance(other, Series):
return obj.index.equals(other.index)
elif not isinstance(other, DataFrame):
return False

# deal with this when a case arises
assert(isinstance(other, DataFrame))
return obj._indexed_same(other)

return False
Expand Down Expand Up @@ -386,10 +384,6 @@ def __repr__(self):
def __iter__(self):
return iter(self.indices)

def get_group_labels(self, group):
inds = self.indices[group]
return self.index.take(inds)

_labels = None
_ids = None
_counts = None
Expand Down Expand Up @@ -647,7 +641,6 @@ def transform(self, func):
result = self.obj.copy()

for name, group in self:
# XXX
group.name = name
res = func(group)
indexer, _ = self.obj.index.get_indexer(group.index)
Expand Down Expand Up @@ -682,8 +675,6 @@ def _agg_stride_shape(self):
return n,

def __getitem__(self, key):
if key not in self.obj:
raise KeyError('column %s not found' % key)
return SeriesGroupBy(self.obj[key], groupings=self.groupings,
exclusions=self.exclusions, name=key)

Expand Down Expand Up @@ -754,10 +745,7 @@ def _aggregate_generic(self, agger, axis=0):
result[name] = data.apply(agger, axis=axis)
except Exception, e1:
if axis == 0:
try:
return self._aggregate_item_by_item(agger)
except Exception:
raise e1
return self._aggregate_item_by_item(agger)
else:
raise e1

Expand Down Expand Up @@ -875,7 +863,7 @@ def transform(self, func):
return _concat_frames(applied, obj.index, obj.columns,
axis=self.axis)

def _concat_frames(frames, index=None, columns=None, axis=0):
def _concat_frames(frames, index, columns=None, axis=0):
if axis == 0:
all_index = [np.asarray(x.index) for x in frames]
new_index = Index(np.concatenate(all_index))
Expand All @@ -887,10 +875,7 @@ def _concat_frames(frames, index=None, columns=None, axis=0):
else:
all_columns = [np.asarray(x.columns) for x in frames]
new_columns = Index(np.concatenate(all_columns))
if index is None:
new_index = frames[0].index
else:
new_index = index
new_index = index

new_values = np.concatenate([x.values for x in frames], axis=axis)
result = DataFrame(new_values, index=new_index, columns=new_columns)
Expand Down Expand Up @@ -951,8 +936,6 @@ def _make_concat_multiindex(indexes, keys, groupings):
return MultiIndex(levels=levels, labels=labels)

def _all_indexes_same(indexes):
if len(indexes) == 1:
return True
first = indexes[0]
for index in indexes[1:]:
if not first.equals(index):
Expand Down
94 changes: 86 additions & 8 deletions pandas/tests/test_groupby.py
@@ -1,6 +1,7 @@
import nose
import unittest

from datetime import datetime
from numpy import nan

from pandas.core.daterange import DateRange
Expand Down Expand Up @@ -110,10 +111,19 @@ def test_get_group(self):
expected = wp.reindex(major=[x for x in wp.major_axis if x.month == 1])
assert_panel_equal(gp, expected)

def test_series_agg_corner(self):
def test_agg_apply_corner(self):
# nothing to group, all NA
result = self.ts.groupby(self.ts * np.nan).sum()
assert_series_equal(result, Series([]))
grouped = self.ts.groupby(self.ts * np.nan)

assert_series_equal(grouped.sum(), Series([]))
assert_series_equal(grouped.agg(np.sum), Series([]))
assert_series_equal(grouped.apply(np.sum), Series([]))

# DataFrame
grouped = self.tsframe.groupby(self.tsframe['A'] * np.nan)
assert_frame_equal(grouped.sum(), DataFrame({}))
assert_frame_equal(grouped.agg(np.sum), DataFrame({}))
assert_frame_equal(grouped.apply(np.sum), DataFrame({}))

def test_len(self):
df = tm.makeTimeDataFrame()
Expand Down Expand Up @@ -191,11 +201,34 @@ def test_transform(self):
transformed = grouped.transform(lambda x: x * x.sum())
self.assertEqual(transformed[7], 12)

transformed = grouped.transform(np.mean)
for name, group in grouped:
mean = group.mean()
for idx in group.index:
self.assertEqual(transformed[idx], mean)
def test_transform_broadcast(self):
grouped = self.ts.groupby(lambda x: x.month)
result = grouped.transform(np.mean)

self.assert_(result.index.equals(self.ts.index))
for _, gp in grouped:
self.assert_((result.reindex(gp.index) == gp.mean()).all())

grouped = self.tsframe.groupby(lambda x: x.month)
result = grouped.transform(np.mean)
self.assert_(result.index.equals(self.tsframe.index))
for _, gp in grouped:
agged = gp.mean()
res = result.reindex(gp.index)
for col in self.tsframe:
self.assert_((res[col] == agged[col]).all())

# group columns
grouped = self.tsframe.groupby({'A' : 0, 'B' : 0, 'C' : 1, 'D' : 1},
axis=1)
result = grouped.transform(np.mean)
self.assert_(result.index.equals(self.tsframe.index))
self.assert_(result.columns.equals(self.tsframe.columns))
for _, gp in grouped:
agged = gp.mean(1)
res = result.reindex(columns=gp.columns)
for idx in gp.index:
self.assert_((res.xs(idx) == agged[idx]).all())

def test_dispatch_transform(self):
df = self.tsframe[::5].reindex(self.tsframe.index)
Expand Down Expand Up @@ -248,6 +281,21 @@ def test_series_describe_multikey(self):
assert_series_equal(result['std'], grouped.std())
assert_series_equal(result['min'], grouped.min())

def test_series_describe_single(self):
ts = tm.makeTimeSeries()
grouped = ts.groupby(lambda x: x.month)
result = grouped.agg(lambda x: x.describe())
expected = grouped.describe()
assert_frame_equal(result, expected)

def test_series_agg_multikey(self):
ts = tm.makeTimeSeries()
grouped = ts.groupby([lambda x: x.year, lambda x: x.month])

result = grouped.agg(np.sum)
expected = grouped.sum()
assert_series_equal(result, expected)

def test_frame_describe_multikey(self):
grouped = self.tsframe.groupby([lambda x: x.year,
lambda x: x.month])
Expand Down Expand Up @@ -482,6 +530,17 @@ def test_omit_nuisance(self):
expected = self.df.ix[:, ['A', 'C', 'D']].groupby('A').mean()
assert_frame_equal(result, expected)

df = self.df.ix[:, ['A', 'C', 'D']]
df['E'] = datetime.now()
grouped = df.groupby('A')
result = grouped.agg(np.sum)
expected = grouped.sum()
assert_frame_equal(result, expected)

# won't work with axis = 1
grouped = df.groupby({'A' : 0, 'C' : 0, 'D' : 1, 'E' : 1}, axis=1)
result = self.assertRaises(TypeError, grouped.agg, np.sum)

def test_nonsense_func(self):
df = DataFrame([0])
self.assertRaises(Exception, df.groupby, lambda x: x + 'foo')
Expand Down Expand Up @@ -587,6 +646,25 @@ def f(piece):
self.assert_(isinstance(result, DataFrame))
self.assert_(result.index.equals(ts.index))

def test_apply_transform(self):
grouped = self.ts.groupby(lambda x: x.month)
result = grouped.apply(lambda x: x * 2)
expected = grouped.transform(lambda x: x * 2)
assert_series_equal(result, expected)

def test_apply_multikey_corner(self):
grouped = self.tsframe.groupby([lambda x: x.year,
lambda x: x.month])

def f(group):
return group.sort('A')[-5:]

result = grouped.apply(f)
for x in grouped:
key = x[:-1]
group = x[-1]
assert_frame_equal(result.ix[key], f(group))

class TestPanelGroupBy(unittest.TestCase):

def setUp(self):
Expand Down

0 comments on commit 00c8da0

Please sign in to comment.