Skip to content

Commit

Permalink
ENH: implement DataFrameGroupBy.boxplot(), close pandas-dev#1507
Browse files Browse the repository at this point in the history
  • Loading branch information
lodagro committed Jun 26, 2012
1 parent 1475971 commit 34f4deb
Show file tree
Hide file tree
Showing 3 changed files with 99 additions and 0 deletions.
3 changes: 3 additions & 0 deletions pandas/core/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -1816,6 +1816,9 @@ def _wrap_agged_blocks(self, blocks):

return result

from pandas.tools.plotting import boxplot_frame_groupby
DataFrameGroupBy.boxplot = boxplot_frame_groupby

class PanelGroupBy(NDFrameGroupBy):

def _iterate_slices(self):
Expand Down
32 changes: 32 additions & 0 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,38 @@ def test_legend_name(self):
def _check_plot_fails(self, f, *args, **kwargs):
self.assertRaises(Exception, f, *args, **kwargs)

class TestDataFrameGroupByPlots(unittest.TestCase):

@classmethod
def setUpClass(cls):
import sys
if 'IPython' in sys.modules:
raise nose.SkipTest

try:
import matplotlib as mpl
mpl.use('Agg', warn=False)
except ImportError:
raise nose.SkipTest

@slow
def test_boxplot(self):
df = DataFrame(np.random.rand(10,2), columns=['Col1', 'Col2'] )
df['X'] = Series(['A','A','A','A','A','B','B','B','B','B'])
grouped = df.groupby(by='X')
_check_plot_works(grouped.boxplot)
_check_plot_works(grouped.boxplot, subplots=False)

tuples = zip(list(string.ascii_letters[:10]), range(10))
df = DataFrame(np.random.rand(10, 3),
index=MultiIndex.from_tuples(tuples))
grouped = df.groupby(level=1)
_check_plot_works(grouped.boxplot)
_check_plot_works(grouped.boxplot, subplots=False)
grouped = df.unstack(level=1).groupby(level=0, axis=1)
_check_plot_works(grouped.boxplot)
_check_plot_works(grouped.boxplot, subplots=False)

PNG_PATH = 'tmp.png'

def _check_plot_works(f, *args, **kwargs):
Expand Down
64 changes: 64 additions & 0 deletions pandas/tools/plotting.py
Original file line number Diff line number Diff line change
Expand Up @@ -1251,6 +1251,70 @@ def hist_series(self, ax=None, grid=True, xlabelsize=None, xrot=None,

return ax

def boxplot_frame_groupby(grouped, subplots=True, column=None, fontsize=None,
rot=0, grid=True, figsize=None, **kwds):
"""
Make box plots from DataFrameGroupBy data.
Parameters
----------
subplots :
* ``False`` - no subplots will be used
* ``True`` - create a subplot for each group
column : column name or list of names, or vector
Can be any valid input to groupby
fontsize : int or string
rot : label rotation angle
kwds : other plotting keyword arguments to be passed to matplotlib boxplot
function
Returns
-------
dict of key/value = group key/DataFrame.boxplot return value
or DataFrame.boxplot return value in case subplots=figures=False
Examples
--------
>>> import pandas
>>> import numpy as np
>>> import itertools
>>>
>>> tuples = [t for t in itertools.product(range(1000), range(4))]
>>> index = pandas.MultiIndex.from_tuples(tuples, names=['lvl0', 'lvl1'])
>>> data = np.random.randn(len(index),4)
>>> df = pandas.DataFrame(data, columns=list('ABCD'), index=index)
>>>
>>> grouped = df.groupby(level='lvl1')
>>> boxplot_frame_groupby(grouped)
>>>
>>> grouped = df.unstack(level='lvl1').groupby(level=0, axis=1)
>>> boxplot_frame_groupby(grouped, subplots=False)
"""
if subplots is True:
nrows, ncols = _get_layout(len(grouped))
_, axes = _subplots(nrows=nrows, ncols=ncols, squeeze=False,
sharex=False, sharey=True)
axes = axes.reshape(-1) if len(grouped) > 1 else axes

ret = {}
for (key, group), ax in zip(grouped, axes):
d = group.boxplot(ax=ax, column=column, fontsize=fontsize,
rot=rot, grid=grid, figsize=figsize, **kwds)
ax.set_title(_stringify(key))
ret[key] = d
else:
from pandas.tools.merge import concat
keys, frames = zip(*grouped)
if grouped.axis == 0:
df = concat(frames, keys=keys, axis=1)
else:
if len(frames) > 1:
df = frames[0].join(frames[1::])
else:
df = frames[0]
ret = df.boxplot(column=column, fontsize=fontsize, rot=rot,
grid=grid, figsize=figsize, **kwds)
return ret

def _grouped_plot(plotf, data, column=None, by=None, numeric_only=True,
figsize=None, sharex=True, sharey=True, layout=None,
Expand Down

0 comments on commit 34f4deb

Please sign in to comment.