Skip to content

Commit

Permalink
Fix fusion optimize bug with castra
Browse files Browse the repository at this point in the history
Previously we would fuse selections because they used getitem, like column
access.
  • Loading branch information
mrocklin committed Aug 26, 2015
1 parent 3ac078a commit 978da9f
Show file tree
Hide file tree
Showing 2 changed files with 19 additions and 2 deletions.
5 changes: 3 additions & 2 deletions dask/dataframe/core.py
Expand Up @@ -849,8 +849,9 @@ def __getitem__(self, key):
key, self.divisions)
if isinstance(key, Series) and self.divisions == key.divisions:
name = 'series-slice-%s[%s]' % (self._name, key._name)
dsk = dict(((name, i), (operator.getitem, (self._name, i),
(key._name, i)))
dsk = dict(((name, i), (self._partition_type._getitem_array,
(self._name, i),
(key._name, i)))
for i in range(self.npartitions))
return self._constructor(merge(self.dask, key.dask, dsk), name,
self.columns, self.divisions)
Expand Down
16 changes: 16 additions & 0 deletions dask/dataframe/tests/test_io.py
Expand Up @@ -470,6 +470,22 @@ def test_from_castra():
del with_fn, c


def test_from_castra_with_selection():
""" Optimizations fuse getitems with load_partitions
We used to use getitem for both column access and selections
"""
pytest.importorskip('castra')
df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'],
'y': [2, 3, 4, 5]},
index=pd.Index([1., 2., 3., 4.], name='ind'))
a = dd.from_pandas(df, 2)

b = dd.from_castra(a.to_castra())

assert eq(b[b.y > 3].x, df[df.y > 3].x)


def test_to_hdf():
pytest.importorskip('tables')
df = pd.DataFrame({'x': ['a', 'b', 'c', 'd'],
Expand Down

0 comments on commit 978da9f

Please sign in to comment.