Skip to content

Commit

Permalink
Add DataFrame.eval
Browse files Browse the repository at this point in the history
Fixes dask#1153
  • Loading branch information
mrocklin committed May 12, 2016
1 parent 130a6a9 commit b9d55b9
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 0 deletions.
12 changes: 12 additions & 0 deletions dask/dataframe/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1522,6 +1522,14 @@ def query(self, expr, **kwargs):
return self._constructor(merge(dsk, self.dask), name,
dummy, self.divisions)

@derived_from(pd.DataFrame)
def eval(self, expr, inplace=None, **kwargs):
if '=' in expr and inplace in (True, None):
raise NotImplementedError("Inplace eval not supported."
" Please use inplace=False")
meta = self._pd.eval(expr, inplace=inplace, **kwargs)
return self.map_partitions(_eval, meta, expr, inplace=inplace, **kwargs)

@derived_from(pd.DataFrame)
def dropna(self, how='any', subset=None):
# for cloudpickle
Expand Down Expand Up @@ -2702,3 +2710,7 @@ def set_sorted_index(df, index, drop=True, **kwargs):

def _set_sorted_index(df, idx, drop):
return df.set_index(idx, drop=drop)


def _eval(df, expr, **kwargs):
return df.eval(expr, **kwargs)
18 changes: 18 additions & 0 deletions dask/dataframe/tests/test_dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -1344,6 +1344,24 @@ def test_query():
assert eq(q, df.query('x**2 > y'))


@pytest.mark.skipif(LooseVersion(pd.__version__) <= '0.18.0',
reason="eval inplace not supported")
def test_eval():
p = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]})
d = dd.from_pandas(p, npartitions=2)
with ignoring(ImportError):
assert eq(p.eval('x + y'), d.eval('x + y'))
assert eq(p.eval('z = x + y', inplace=False),
d.eval('z = x + y', inplace=False))
with pytest.raises(NotImplementedError):
d.eval('z = x + y', inplace=True)

if p.eval('z = x + y', inplace=None) is None:
with pytest.raises(NotImplementedError):
d.eval('z = x + y', inplace=None)



def test_deterministic_arithmetic_names():
df = pd.DataFrame({'x': [1, 2, 3, 4], 'y': [5, 6, 7, 8]})
a = dd.from_pandas(df, npartitions=2)
Expand Down

0 comments on commit b9d55b9

Please sign in to comment.