Skip to content

Commit

Permalink
add WideDataFrame.cov
Browse files Browse the repository at this point in the history
Decided it was good to have something non-trivial
  • Loading branch information
mrocklin committed Sep 4, 2015
1 parent db7d566 commit 62b6a9b
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 1 deletion.
2 changes: 2 additions & 0 deletions dask/dataframe/tests/test_wide.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,5 @@ def test_WideDataFrame():

tm.assert_series_equal((ddf.x + ddf.y).compute(),
df.x + df.y)

tm.assert_frame_equal(ddf.cov().compute(), df.cov())
18 changes: 17 additions & 1 deletion dask/dataframe/wide.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
import pandas as pd
from collections import Iterable
from operator import getitem
import operator

import pandas as pd
from toolz import merge

from ..base import Base, tokenize
from .. import threaded
Expand Down Expand Up @@ -63,6 +66,19 @@ def __dir__(self):
return sorted(set(dir(type(self)) + list(self.__dict__) +
list(self.columns)))

def cov(self):
name = 'cov-' + self._name
dsk = dict(((name, a, b),
(pd.Series.cov, (self._name, a), (self._name, b)))
for a in self.columns
for b in self.columns)
dsk2 = dict(((name, a),
(pd.Series, (list, [(name, a, b) for b in self.columns]),
self.columns, None, a))
for a in self.columns)
return WideDataFrame(merge(dsk, dsk2, self.dask), name,
self.columns, self.columns)


class WideSeries(Base):
_optimize = staticmethod(lambda dsk, keys: dsk)
Expand Down

0 comments on commit 62b6a9b

Please sign in to comment.