Skip to content

Commit

Permalink
add nlargest to Series
Browse files Browse the repository at this point in the history
  • Loading branch information
mrocklin committed Jul 8, 2015
1 parent 4373fbd commit 274d5d9
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 0 deletions.
5 changes: 5 additions & 0 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,6 +485,11 @@ def value_counts(self):
agg = lambda s: s.groupby(level=0).sum()
return aca(self, chunk=chunk, aggregate=agg, columns=self.columns)

@wraps(pd.Series.nlargest)
def nlargest(self, n=5):
f = lambda s: s.nlargest(n)
return aca(self, f, f, columns=self.columns)

@wraps(pd.Series.isin)
def isin(self, other):
return elemwise(pd.Series.isin, self, other)
Expand Down
7 changes: 7 additions & 0 deletions dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -569,3 +569,10 @@ def test_loc_on_pandas_datetimes():
def test_coerce_loc_index():
for t in [pd.Timestamp, np.datetime64]:
assert isinstance(_coerce_loc_index([t('2014')], '2014'), t)


def test_nlargest_series():
s = pd.Series([1, 3, 5, 2, 4, 6])
ss = dd.from_pandas(s, npartitions=2)

assert eq(ss.nlargest(2), s.nlargest(2))

0 comments on commit 274d5d9

Please sign in to comment.