add nlargest to Series

mrocklin · Jul 8, 2015 · 274d5d9 · 274d5d9
1 parent 4373fbd
commit 274d5d9
Show file tree

Hide file tree

Showing 2 changed files with 12 additions and 0 deletions.
diff --git a/dask/dataframe/core.py b/dask/dataframe/core.py
@@ -485,6 +485,11 @@ def value_counts(self):
         agg = lambda s: s.groupby(level=0).sum()
         return aca(self, chunk=chunk, aggregate=agg, columns=self.columns)
 
+    @wraps(pd.Series.nlargest)
+    def nlargest(self, n=5):
+        f = lambda s: s.nlargest(n)
+        return aca(self, f, f, columns=self.columns)
+
     @wraps(pd.Series.isin)
     def isin(self, other):
         return elemwise(pd.Series.isin, self, other)

diff --git a/dask/dataframe/tests/test_dataframe.py b/dask/dataframe/tests/test_dataframe.py
@@ -569,3 +569,10 @@ def test_loc_on_pandas_datetimes():
 def test_coerce_loc_index():
     for t in [pd.Timestamp, np.datetime64]:
         assert isinstance(_coerce_loc_index([t('2014')], '2014'), t)
+
+
+def test_nlargest_series():
+    s = pd.Series([1, 3, 5, 2, 4, 6])
+    ss = dd.from_pandas(s, npartitions=2)
+
+    assert eq(ss.nlargest(2), s.nlargest(2))