Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

Already on GitHub? Sign in to your account

value_counts() can now compute relative frequencies. #2710

Closed
wants to merge 1 commit into
from
Jump to file or symbol
Failed to load files and symbols.
+20 −3
Split
@@ -147,7 +147,7 @@ def factorize(values, sort=False, order=None, na_sentinel=-1):
return labels, uniques
-def value_counts(values, sort=True, ascending=False):
+def value_counts(values, sort=True, ascending=False, normalize=False):
"""
Compute a histogram of the counts of non-null values
@@ -158,6 +158,8 @@ def value_counts(values, sort=True, ascending=False):
Sort by values
ascending : boolean, default False
Sort in ascending order
+ normalize: boolean, default False
+ If True then compute a relative histogram
Returns
-------
@@ -190,6 +192,9 @@ def value_counts(values, sort=True, ascending=False):
if not ascending:
result = result[::-1]
+ if normalize:
+ result = result / float(values.size)
+
return result
View
@@ -1379,18 +1379,25 @@ def count(self, level=None):
return notnull(self.values).sum()
- def value_counts(self):
+ def value_counts(self, normalize=False):
"""
Returns Series containing counts of unique values. The resulting Series
will be in descending order so that the first element is the most
frequently-occurring element. Excludes NA values
+ Parameters
+ ----------
+ normalize: boolean, default False
+ If True then the Series returned will contain the relative
+ frequencies of the unique values.
+
Returns
-------
counts : Series
"""
from pandas.core.algorithms import value_counts
- return value_counts(self.values, sort=True, ascending=False)
+ return value_counts(self.values, sort=True, ascending=False,
+ normalize=normalize)
def unique(self):
"""
@@ -2383,6 +2383,11 @@ def test_value_counts_nunique(self):
expected = Series([4, 3, 2, 1], index=['b', 'a', 'd', 'c'])
assert_series_equal(hist, expected)
+ # relative histogram.
+ hist = s.value_counts(normalize=True)
+ expected = Series([.4, .3, .2, .1], index=['b', 'a', 'd', 'c'])
+ assert_series_equal(hist, expected)
+
self.assertEquals(s.nunique(), 4)
# handle NA's properly