Skip to content

Commit

Permalink
✨ support UNION statements
Browse files Browse the repository at this point in the history
Fixes #21 (partial1)
  • Loading branch information
joocer committed Jan 5, 2024
1 parent 50e6dd5 commit a7adc84
Show file tree
Hide file tree
Showing 6 changed files with 103 additions and 14 deletions.
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,4 @@ class VersionStatus(Enum):

__version__ = f"{_major}.{_minor}.{_revision}" + (
f"-{_status.value}.{__build__}" if _status != VersionStatus.RELEASE else ""
)
)
26 changes: 15 additions & 11 deletions opteryx/components/logical_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,11 @@ def _table_name(branch):


def inner_query_planner(ast_branch):
if "Query" in ast_branch:
# Sometimes we get a full query plan here (e.g. when queries in set
# functions are in parenthesis)
return plan_query(ast_branch)

inner_plan = LogicalPlan()
step_id = None

Expand Down Expand Up @@ -722,33 +727,32 @@ def plan_query(statement):
if set_operation["op"] == "Union":
set_op_node = LogicalPlanNode(node_type=LogicalPlanStepType.Union)
else:
raise NotImplementedError(f"Unsupported SET operator {set_operation['op']}")
raise UnsupportedSyntaxError(f"Unsupported SET operator '{set_operation['op']}'")

set_op_node.modifier = (
None if set_operation["set_quantifier"] == "None" else set_operation["set_quantifier"]
)
step_id = random_string()
plan = LogicalPlan()
plan.add_node(step_id, set_op_node)

if set_op_node.modifier != "All":
# UNION returns distinct records if used without ALL
distinct = LogicalPlanNode(node_type=LogicalPlanStepType.Distinct)
distinct_id = random_string()
plan.add_node(distinct_id, distinct)
plan.add_edge(step_id, distinct_id)

left_plan = inner_query_planner(set_operation["left"])
plan += left_plan
subquery_entry_id = left_plan.get_exit_points()[0]
plan.add_edge(subquery_entry_id, step_id)

right_plan = inner_query_planner(set_operation["left"])
right_plan = inner_query_planner(set_operation["right"])
plan += right_plan
subquery_entry_id = right_plan.get_exit_points()[0]
plan.add_edge(subquery_entry_id, step_id)

root_node["Select"] = {}
parent_plan = inner_query_planner(root_node)
if len(parent_plan) > 0:
plan += parent_plan
parent_plan_exit_id = parent_plan.get_entry_points()[0]
plan.add_edge(step_id, parent_plan_exit_id)

raise UnsupportedSyntaxError("Set operators are not supported")

return plan

# we do some minor AST rewriting
Expand Down
2 changes: 1 addition & 1 deletion opteryx/components/temporary_physical_planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def create_physical_plan(logical_plan, query_properties):
elif node_type == LogicalPlanStepType.Subquery:
node = operators.NoOpNode(query_properties, **node_config)
elif node_type == LogicalPlanStepType.Union:
node = operators.NoOpNode(query_properties, **node_config)
node = operators.UnionNode(query_properties, **node_config)

else:
raise Exception(f"something unexpected happed - {node_type.name}")
Expand Down
1 change: 1 addition & 0 deletions opteryx/operators/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
# from .show_functions_node import ShowFunctionsNode # supported functions
from .show_value_node import ShowValueNode # display node for SHOW
from .sort_node import SortNode # order by selected columns
from .union_node import UnionNode


def is_aggregator(name):
Expand Down
50 changes: 50 additions & 0 deletions opteryx/operators/union_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Union Node
This is a SQL Query Execution Plan Node.
"""
from typing import Generator

from opteryx.models import QueryProperties
from opteryx.operators import BasePlanNode


class UnionNode(BasePlanNode):
def __init__(self, properties: QueryProperties, **config):
super().__init__(properties=properties)

@property
def name(self): # pragma: no cover
return "Union"

@property
def config(self): # pragma: no cover
return ""

def execute(self) -> Generator:
"""
Union needs to ensure the column names are the same and that
coercible types are coerced.
"""
schema = None
if self._producers:
for morsels in self._producers:
for morsel in morsels.execute():
if schema is None:
schema = morsel.schema
else:
morsel = morsel.rename_columns(schema.names)
morsel = morsel.cast(schema)
yield morsel
36 changes: 35 additions & 1 deletion tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -1202,7 +1202,7 @@
("SELECT $planets.id FROM $satellites", None, None, UnexpectedDatasetReferenceError),

# V2 New Syntax Checks
("SELECT * FROM $planets AS P1 UNION SELECT * FROM $planets AS P2;", None, None, UnsupportedSyntaxError),
("SELECT * FROM $planets AS P1 UNION SELECT * FROM $planets AS P2;", 9, 20, UnsupportedSyntaxError),
("SELECT * FROM $planets AS P LEFT ANTI JOIN $satellites AS S ON S.id = P.id;", 0, 20, None),
("SELECT * FROM $planets AS P RIGHT ANTI JOIN $satellites AS S ON S.id = P.id;", 168, 8, None),
("SELECT * FROM $planets AS P LEFT SEMI JOIN $satellites AS S ON S.id = P.id;", 9, 20, None),
Expand All @@ -1211,6 +1211,40 @@
("SELECT DISTINCT ON (planetId) planetId, name FROM $satellites ", 7, 2, None),
("SELECT 8 DIV 4", 1, 1, None),

# Test cases for UNION
("SELECT name FROM $planets AS p1 UNION SELECT name FROM $planets AS p2", 9, 1, None),
("SELECT name FROM $planets p1 UNION SELECT name FROM $planets p2 WHERE p2.name != 'Earth'", 9, 1, None),
("SELECT name FROM $planets UNION SELECT name FROM $planets", 18, 1, AmbiguousDatasetError),
("SELECT name FROM $planets AS p1 UNION ALL SELECT name FROM $planets AS p2", 18, 1, None),
("SELECT name FROM $planets p1 UNION ALL SELECT name FROM $planets p2 WHERE p2.name = 'Mars'", 10, 1, None),
("SELECT name AS planet_name FROM $planets p1 UNION SELECT name FROM $planets p2", 9, 1, None),
("SELECT name FROM $planets p1 UNION ALL SELECT name AS planet_name FROM $planets p2", 18, 1, None),
("SELECT name FROM $planets AS p1 WHERE name LIKE 'M%' UNION SELECT name FROM $planets AS p2 WHERE name LIKE 'V%'", 3, 1, None),
("SELECT name FROM $planets P1 WHERE name LIKE 'E%' UNION ALL SELECT name FROM $planets P2 WHERE name LIKE 'M%'", 3, 1, None),
("SELECT name FROM $planets P1 INTERSECT SELECT name FROM $planets P2", 0, 0, UnsupportedSyntaxError),
("SELECT name FROM $planets p1 EXCEPT SELECT name FROM $planets p2", 0, 0, UnsupportedSyntaxError),
("(SELECT name FROM $planets AS p1) UNION (SELECT name FROM $planets) LIMIT 5", 5, 1, None),
("(SELECT name FROM $planets AS p1) UNION (SELECT name FROM $planets) LIMIT 5 OFFSET 4", 5, 1, None),
("(SELECT name FROM $planets AS p1) UNION (SELECT name FROM $planets) LIMIT 3 OFFSET 6", 3, 1, None),
("(SELECT name FROM $planets AS p1 LIMIT 3) UNION ALL (SELECT name FROM $planets LIMIT 2)", 5, 1, None),
("(SELECT name FROM $planets AS p1 OFFSET 2) UNION ALL (SELECT name FROM $planets OFFSET 3)", 4, 1, None),
("(SELECT name FROM $planets AS p1 LIMIT 4 OFFSET 1) UNION ALL (SELECT name FROM $planets LIMIT 3 OFFSET 2)", 6, 1, None),
("(SELECT name FROM $planets AS p1) UNION ALL (SELECT name FROM $planets) LIMIT 10", 10, 1, None),
("(SELECT name FROM $planets AS p1) UNION ALL (SELECT name FROM $planets) OFFSET 8", 10, 1, None),
("(SELECT name FROM $planets AS p1 LIMIT 5) UNION ALL (SELECT name FROM $planets OFFSET 3)", 11, 1, None),
("(SELECT name FROM $planets AS p1 LIMIT 4 OFFSET 2) UNION ALL (SELECT name FROM $planets LIMIT 3 OFFSET 1) LIMIT 5 OFFSET 3", 5, 1, None),
("SELECT mass FROM $planets AS p1 UNION SELECT diameter FROM $planets", 9, 1, None),
("SELECT mass AS m FROM $planets AS p1 UNION SELECT mass FROM $planets", 9, 1, None),
("SELECT name FROM $planets AS p1 WHERE mass IS NULL UNION SELECT name FROM $planets WHERE diameter IS NULL", 0, 1, None),
("SELECT name FROM $planets AS p1 UNION ALL SELECT name FROM $planets", 18, 1, None), # Assuming no large data set available
("SELECT name FROM (SELECT name FROM $planets P1 UNION SELECT name FROM $planets) AS subquery", 9, 1, None),
("SELECT a.name FROM $planets a JOIN (SELECT name FROM $planets AS P1 UNION SELECT name FROM $planets) b ON a.name = b.name", 9, 1, None),
("(SELECT name FROM $planets AS P1 ORDER BY mass DESC) UNION (SELECT name FROM $planets ORDER BY diameter ASC)", 9, 1, None),
("SELECT gravity FROM $planets AS P1 UNION SELECT gravity FROM $planets", 8, 1, None), # Assuming two planets have the same gravity
("(SELECT name FROM $planets AS p1 LIMIT 3) UNION ALL (SELECT name FROM $planets OFFSET 2)", 10, 1, None),
("SELECT AVG(mass) FROM $planets AS p1 UNION SELECT SUM(diameter) FROM $planets", 2, 1, None),
("SELECT name FROM $planets AS p1 WHERE mass > 1.5 OR diameter < 10000 UNION SELECT name FROM $planets WHERE gravity = 3.7", 9, 1, None),

# New and improved JOIN UNNESTs
("SELECT * FROM $planets CROSS JOIN UNNEST(('Earth', 'Moon')) AS n", 18, 21, None),
("SELECT * FROM $planets INNER JOIN UNNEST(('Earth', 'Moon')) AS n ON name = n", 1, 21, None),
Expand Down

0 comments on commit a7adc84

Please sign in to comment.