Skip to content

Commit

Permalink
FEAT-#3875: Optimize append time for omnisci engine (#3876)
Browse files Browse the repository at this point in the history
Signed-off-by: Artem Alekseev <artem.alekseev@intel.com>
  • Loading branch information
fexolm committed Dec 24, 2021
1 parent 72ca672 commit 67013f9
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 15 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -1689,9 +1689,7 @@ def _can_execute_arrow(self):
)
elif isinstance(self._op, TransformNode):
return (
not self._uses_rowid
and self._op.is_simple_select()
and self._op.input[0]._can_execute_arrow()
self._op.is_simple_select() and self._op.input[0]._can_execute_arrow()
)
elif isinstance(self._op, UnionNode):
return all(frame._can_execute_arrow() for frame in self._op.input)
Expand Down Expand Up @@ -1737,13 +1735,16 @@ def _arrow_select(self, exprs):
The resulting table.
"""
table = self._execute_arrow()
schema = table.schema

new_fields = []
new_columns = []

for col, expr in exprs.items():
field = schema.field(f"F_{expr.column}")
if expr.column == "__rowid__" and "F___rowid__" not in table.schema.names:
arr = pyarrow.array(np.arange(0, table.num_rows))
table = table.append_column("F___rowid__", arr)

field = table.schema.field(f"F_{expr.column}")
if col != expr.column:
field = field.with_name(f"F_{col}")
new_fields.append(field)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2026,16 +2026,9 @@ def test_shape_hint_detection_from_arrow(self):


class TestArrowExecution:
data1 = {
"a": [1, 2, 3],
"b": [3, 4, 5],
"c": [6, 7, 8],
}
data2 = {
"a": [1, 2, 3],
"d": [3, 4, 5],
"e": [6, 7, 8],
}
data1 = {"a": [1, 2, 3], "b": [3, 4, 5], "c": [6, 7, 8]}
data2 = {"a": [1, 2, 3], "d": [3, 4, 5], "e": [6, 7, 8]}
data3 = {"a": [4, 5, 6], "b": [6, 7, 8], "c": [9, 10, 11]}

def test_drop_rename_concat(self):
def drop_rename_concat(df1, df2, lib, **kwargs):
Expand All @@ -2058,6 +2051,15 @@ def apply(df, **kwargs):

run_and_compare(apply, data={}, force_arrow_execute=True)

def test_append(self):
def apply(df1, df2, **kwargs):
tmp = df1.append(df2)
return tmp

run_and_compare(
apply, data=self.data1, data2=self.data3, force_arrow_execute=True
)


if __name__ == "__main__":
pytest.main(["-v", __file__])

0 comments on commit 67013f9

Please sign in to comment.