diff --git a/dask/dataframe/tests/test_wide.py b/dask/dataframe/tests/test_wide.py index e5e07a6b6fed..48d58c3a3a60 100644 --- a/dask/dataframe/tests/test_wide.py +++ b/dask/dataframe/tests/test_wide.py @@ -20,3 +20,5 @@ def test_WideDataFrame(): tm.assert_series_equal((ddf.x + ddf.y).compute(), df.x + df.y) + + tm.assert_frame_equal(ddf.cov().compute(), df.cov()) diff --git a/dask/dataframe/wide.py b/dask/dataframe/wide.py index f1aaaade3797..97f02f172b4d 100644 --- a/dask/dataframe/wide.py +++ b/dask/dataframe/wide.py @@ -1,6 +1,9 @@ -import pandas as pd from collections import Iterable from operator import getitem +import operator + +import pandas as pd +from toolz import merge from ..base import Base, tokenize from .. import threaded @@ -63,6 +66,19 @@ def __dir__(self): return sorted(set(dir(type(self)) + list(self.__dict__) + list(self.columns))) + def cov(self): + name = 'cov-' + self._name + dsk = dict(((name, a, b), + (pd.Series.cov, (self._name, a), (self._name, b))) + for a in self.columns + for b in self.columns) + dsk2 = dict(((name, a), + (pd.Series, (list, [(name, a, b) for b in self.columns]), + self.columns, None, a)) + for a in self.columns) + return WideDataFrame(merge(dsk, dsk2, self.dask), name, + self.columns, self.columns) + class WideSeries(Base): _optimize = staticmethod(lambda dsk, keys: dsk)