Skip to content

Commit

Permalink
Merge pull request #7809 from sinhrks/histplot
Browse files Browse the repository at this point in the history
ENH/CLN: add HistPlot class inheriting MPLPlot
  • Loading branch information
jreback committed Aug 11, 2014
2 parents f400014 + 4a23a84 commit cd7f09e
Show file tree
Hide file tree
Showing 4 changed files with 374 additions and 68 deletions.
2 changes: 2 additions & 0 deletions doc/source/v0.15.0.txt
Original file line number Diff line number Diff line change
Expand Up @@ -170,6 +170,8 @@ previously results in ``Exception`` or ``TypeError`` (:issue:`7812`)
- ``merge``, ``DataFrame.merge``, and ``ordered_merge`` now return the same type
as the ``left`` argument. (:issue:`7737`)

- Histogram from ``DataFrame.plot`` with ``kind='hist'`` (:issue:`7809`), See :ref:`the docs<visualization.hist>`.

.. _whatsnew_0150.dt:

.dt accessor
Expand Down
41 changes: 41 additions & 0 deletions doc/source/visualization.rst
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,7 @@ a handful of values for plots other than the default Line plot.
These include:

* :ref:`'bar' <visualization.barplot>` or :ref:`'barh' <visualization.barplot>` for bar plots
* :ref:`'hist' <visualization.hist>` for histogram
* :ref:`'kde' <visualization.kde>` or ``'density'`` for density plots
* :ref:`'area' <visualization.area_plot>` for area plots
* :ref:`'scatter' <visualization.scatter_matrix>` for scatter plots
Expand Down Expand Up @@ -205,6 +206,46 @@ To get horizontal bar plots, pass ``kind='barh'``:

Histograms
~~~~~~~~~~

.. versionadded:: 0.15.0

Histogram can be drawn specifying ``kind='hist'``.

.. ipython:: python
df4 = DataFrame({'a': randn(1000) + 1, 'b': randn(1000),
'c': randn(1000) - 1}, columns=['a', 'b', 'c'])
plt.figure();
@savefig hist_new.png
df4.plot(kind='hist', alpha=0.5)
Histogram can be stacked by ``stacked=True``. Bin size can be changed by ``bins`` keyword.

.. ipython:: python
plt.figure();
@savefig hist_new_stacked.png
df4.plot(kind='hist', stacked=True, bins=20)
You can pass other keywords supported by matplotlib ``hist``. For example, horizontal and cumulative histgram can be drawn by ``orientation='horizontal'`` and ``cumulative='True'``.

.. ipython:: python
plt.figure();
@savefig hist_new_kwargs.png
df4['a'].plot(kind='hist', orientation='horizontal', cumulative=True)
See the :meth:`hist <matplotlib.axes.Axes.hist>` method and the
`matplotlib hist documenation <http://matplotlib.org/api/pyplot_api.html#matplotlib.pyplot.hist>`__ for more.


The previous interface ``DataFrame.hist`` to plot histogram still can be used.

.. ipython:: python
plt.figure();
Expand Down
221 changes: 212 additions & 9 deletions pandas/tests/test_graphics.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,7 +452,7 @@ def test_plot(self):
_check_plot_works(self.ts.plot, kind='area', stacked=False)
_check_plot_works(self.iseries.plot)

for kind in ['line', 'bar', 'barh', 'kde']:
for kind in ['line', 'bar', 'barh', 'kde', 'hist']:
if not _ok_for_gaussian_kde(kind):
continue
_check_plot_works(self.series[:5].plot, kind=kind)
Expand Down Expand Up @@ -616,7 +616,13 @@ def test_pie_series(self):
self._check_text_labels(ax.texts, series.index)

@slow
def test_hist(self):
def test_hist_df_kwargs(self):
df = DataFrame(np.random.randn(10, 2))
ax = df.plot(kind='hist', bins=5)
self.assertEqual(len(ax.patches), 10)

@slow
def test_hist_legacy(self):
_check_plot_works(self.ts.hist)
_check_plot_works(self.ts.hist, grid=False)
_check_plot_works(self.ts.hist, figsize=(8, 10))
Expand All @@ -637,7 +643,7 @@ def test_hist(self):
self.ts.hist(by=self.ts.index, figure=fig)

@slow
def test_hist_bins(self):
def test_hist_bins_legacy(self):
df = DataFrame(np.random.randn(10, 2))
ax = df.hist(bins=2)[0][0]
self.assertEqual(len(ax.patches), 2)
Expand Down Expand Up @@ -701,13 +707,25 @@ def test_plot_fails_when_ax_differs_from_figure(self):
self.ts.hist(ax=ax1, figure=fig2)

@slow
def test_kde(self):
def test_hist_kde(self):
ax = self.ts.plot(kind='hist', logy=True)
self._check_ax_scales(ax, yaxis='log')
xlabels = ax.get_xticklabels()
# ticks are values, thus ticklabels are blank
self._check_text_labels(xlabels, [''] * len(xlabels))
ylabels = ax.get_yticklabels()
self._check_text_labels(ylabels, [''] * len(ylabels))

tm._skip_if_no_scipy()
_skip_if_no_scipy_gaussian_kde()
_check_plot_works(self.ts.plot, kind='kde')
_check_plot_works(self.ts.plot, kind='density')
ax = self.ts.plot(kind='kde', logy=True)
self._check_ax_scales(ax, yaxis='log')
xlabels = ax.get_xticklabels()
self._check_text_labels(xlabels, [''] * len(xlabels))
ylabels = ax.get_yticklabels()
self._check_text_labels(ylabels, [''] * len(ylabels))

@slow
def test_kde_kwargs(self):
Expand All @@ -718,9 +736,29 @@ def test_kde_kwargs(self):
_check_plot_works(self.ts.plot, kind='density', bw_method=.5, ind=linspace(-100,100,20))
ax = self.ts.plot(kind='kde', logy=True, bw_method=.5, ind=linspace(-100,100,20))
self._check_ax_scales(ax, yaxis='log')
self._check_text_labels(ax.yaxis.get_label(), 'Density')

@slow
def test_kde_color(self):
def test_hist_kwargs(self):
ax = self.ts.plot(kind='hist', bins=5)
self.assertEqual(len(ax.patches), 5)
self._check_text_labels(ax.yaxis.get_label(), 'Degree')
tm.close()

ax = self.ts.plot(kind='hist', orientation='horizontal')
self._check_text_labels(ax.xaxis.get_label(), 'Degree')
tm.close()

ax = self.ts.plot(kind='hist', align='left', stacked=True)
tm.close()

@slow
def test_hist_kde_color(self):
ax = self.ts.plot(kind='hist', logy=True, bins=10, color='b')
self._check_ax_scales(ax, yaxis='log')
self.assertEqual(len(ax.patches), 10)
self._check_colors(ax.patches, facecolors=['b'] * 10)

tm._skip_if_no_scipy()
_skip_if_no_scipy_gaussian_kde()
ax = self.ts.plot(kind='kde', logy=True, color='r')
Expand Down Expand Up @@ -1611,7 +1649,7 @@ def test_boxplot_return_type(self):
self._check_box_return_type(result, 'both')

@slow
def test_kde(self):
def test_kde_df(self):
tm._skip_if_no_scipy()
_skip_if_no_scipy_gaussian_kde()
df = DataFrame(randn(100, 4))
Expand All @@ -1630,7 +1668,122 @@ def test_kde(self):
self._check_ax_scales(axes, yaxis='log')

@slow
def test_hist(self):
def test_hist_df(self):
df = DataFrame(randn(100, 4))
series = df[0]

ax = _check_plot_works(df.plot, kind='hist')
expected = [com.pprint_thing(c) for c in df.columns]
self._check_legend_labels(ax, labels=expected)

axes = _check_plot_works(df.plot, kind='hist', subplots=True, logy=True)
self._check_axes_shape(axes, axes_num=4, layout=(4, 1))
self._check_ax_scales(axes, yaxis='log')

axes = series.plot(kind='hist', rot=40)
self._check_ticks_props(axes, xrot=40, yrot=0)
tm.close()

ax = series.plot(kind='hist', normed=True, cumulative=True, bins=4)
# height of last bin (index 5) must be 1.0
self.assertAlmostEqual(ax.get_children()[5].get_height(), 1.0)
tm.close()

ax = series.plot(kind='hist', cumulative=True, bins=4)
self.assertAlmostEqual(ax.get_children()[5].get_height(), 100.0)
tm.close()

# if horizontal, yticklabels are rotated
axes = df.plot(kind='hist', rot=50, fontsize=8, orientation='horizontal')
self._check_ticks_props(axes, xrot=0, yrot=50, ylabelsize=8)

def _check_box_coord(self, patches, expected_y=None, expected_h=None,
expected_x=None, expected_w=None):
result_y = np.array([p.get_y() for p in patches])
result_height = np.array([p.get_height() for p in patches])
result_x = np.array([p.get_x() for p in patches])
result_width = np.array([p.get_width() for p in patches])

if expected_y is not None:
self.assert_numpy_array_equal(result_y, expected_y)
if expected_h is not None:
self.assert_numpy_array_equal(result_height, expected_h)
if expected_x is not None:
self.assert_numpy_array_equal(result_x, expected_x)
if expected_w is not None:
self.assert_numpy_array_equal(result_width, expected_w)

@slow
def test_hist_df_coord(self):
normal_df = DataFrame({'A': np.repeat(np.array([1, 2, 3, 4, 5]),
np.array([10, 9, 8, 7, 6])),
'B': np.repeat(np.array([1, 2, 3, 4, 5]),
np.array([8, 8, 8, 8, 8])),
'C': np.repeat(np.array([1, 2, 3, 4, 5]),
np.array([6, 7, 8, 9, 10]))},
columns=['A', 'B', 'C'])

nan_df = DataFrame({'A': np.repeat(np.array([np.nan, 1, 2, 3, 4, 5]),
np.array([3, 10, 9, 8, 7, 6])),
'B': np.repeat(np.array([1, np.nan, 2, 3, 4, 5]),
np.array([8, 3, 8, 8, 8, 8])),
'C': np.repeat(np.array([1, 2, 3, np.nan, 4, 5]),
np.array([6, 7, 8, 3, 9, 10]))},
columns=['A', 'B', 'C'])

for df in [normal_df, nan_df]:
ax = df.plot(kind='hist', bins=5)
self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(ax.patches[5:10], expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(ax.patches[10:], expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]))

ax = df.plot(kind='hist', bins=5, stacked=True)
self._check_box_coord(ax.patches[:5], expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(ax.patches[5:10], expected_y=np.array([10, 9, 8, 7, 6]),
expected_h=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(ax.patches[10:], expected_y=np.array([18, 17, 16, 15, 14]),
expected_h=np.array([6, 7, 8, 9, 10]))

axes = df.plot(kind='hist', bins=5, stacked=True, subplots=True)
self._check_box_coord(axes[0].patches, expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(axes[1].patches, expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(axes[2].patches, expected_y=np.array([0, 0, 0, 0, 0]),
expected_h=np.array([6, 7, 8, 9, 10]))

# horizontal
ax = df.plot(kind='hist', bins=5, orientation='horizontal')
self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(ax.patches[5:10], expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(ax.patches[10:], expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]))

ax = df.plot(kind='hist', bins=5, stacked=True, orientation='horizontal')
self._check_box_coord(ax.patches[:5], expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(ax.patches[5:10], expected_x=np.array([10, 9, 8, 7, 6]),
expected_w=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(ax.patches[10:], expected_x=np.array([18, 17, 16, 15, 14]),
expected_w=np.array([6, 7, 8, 9, 10]))

axes = df.plot(kind='hist', bins=5, stacked=True,
subplots=True, orientation='horizontal')
self._check_box_coord(axes[0].patches, expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([10, 9, 8, 7, 6]))
self._check_box_coord(axes[1].patches, expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([8, 8, 8, 8, 8]))
self._check_box_coord(axes[2].patches, expected_x=np.array([0, 0, 0, 0, 0]),
expected_w=np.array([6, 7, 8, 9, 10]))

@slow
def test_hist_df_legacy(self):
_check_plot_works(self.hist_df.hist)

# make sure layout is handled
Expand Down Expand Up @@ -1849,7 +2002,7 @@ def test_plot_int_columns(self):

@slow
def test_df_legend_labels(self):
kinds = 'line', 'bar', 'barh', 'kde', 'area'
kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])
df2 = DataFrame(rand(3, 3), columns=['d', 'e', 'f'])
df3 = DataFrame(rand(3, 3), columns=['g', 'h', 'i'])
Expand Down Expand Up @@ -1927,7 +2080,7 @@ def test_legend_name(self):

@slow
def test_no_legend(self):
kinds = 'line', 'bar', 'barh', 'kde', 'area'
kinds = ['line', 'bar', 'barh', 'kde', 'area', 'hist']
df = DataFrame(rand(3, 3), columns=['a', 'b', 'c'])

for kind in kinds:
Expand Down Expand Up @@ -2019,6 +2172,56 @@ def test_area_colors(self):
poly = [o for o in ax.get_children() if isinstance(o, PolyCollection)]
self._check_colors(poly, facecolors=rgba_colors)

@slow
def test_hist_colors(self):
default_colors = self.plt.rcParams.get('axes.color_cycle')

df = DataFrame(randn(5, 5))
ax = df.plot(kind='hist')
self._check_colors(ax.patches[::10], facecolors=default_colors[:5])
tm.close()

custom_colors = 'rgcby'
ax = df.plot(kind='hist', color=custom_colors)
self._check_colors(ax.patches[::10], facecolors=custom_colors)
tm.close()

from matplotlib import cm
# Test str -> colormap functionality
ax = df.plot(kind='hist', colormap='jet')
rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
tm.close()

# Test colormap functionality
ax = df.plot(kind='hist', colormap=cm.jet)
rgba_colors = lmap(cm.jet, np.linspace(0, 1, 5))
self._check_colors(ax.patches[::10], facecolors=rgba_colors)
tm.close()

ax = df.ix[:, [0]].plot(kind='hist', color='DodgerBlue')
self._check_colors([ax.patches[0]], facecolors=['DodgerBlue'])

@slow
def test_kde_colors(self):
from matplotlib import cm

custom_colors = 'rgcby'
df = DataFrame(rand(5, 5))

ax = df.plot(kind='kde', color=custom_colors)
self._check_colors(ax.get_lines(), linecolors=custom_colors)
tm.close()

ax = df.plot(kind='kde', colormap='jet')
rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
self._check_colors(ax.get_lines(), linecolors=rgba_colors)
tm.close()

ax = df.plot(kind='kde', colormap=cm.jet)
rgba_colors = lmap(cm.jet, np.linspace(0, 1, len(df)))
self._check_colors(ax.get_lines(), linecolors=rgba_colors)

def test_default_color_cycle(self):
import matplotlib.pyplot as plt
plt.rcParams['axes.color_cycle'] = list('rgbk')
Expand Down
Loading

0 comments on commit cd7f09e

Please sign in to comment.