Skip to content

Commit

Permalink
FEATURE/#175
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed Jun 20, 2022
1 parent 36aa959 commit d4102bf
Show file tree
Hide file tree
Showing 27 changed files with 199 additions and 93 deletions.
1 change: 1 addition & 0 deletions docs/Release Notes/Change Log.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [[#177](https://github.com/mabel-dev/opteryx/issues/177)] Support `SHOW FULL COLUMNS` to read entire datasets rather than just the first blob. ([@joocer](https://github.com/joocer))
- [[#194](https://github.com/mabel-dev/opteryx/issues/194)] Functions that are abbreviations, should have the full name as an alias. ([@joocer](https://github.com/joocer))
- [[#201](https://github.com/mabel-dev/opteryx/issues/201)] `generate_series` supports CIDR expansion. ([@joocer](https://github.com/joocer))
- [[#175](https://github.com/mabel-dev/opteryx/issues/175)] Support `WITH (NOCACHE)` hint to disable using cache. ([@joocer](https://github.com/joocer))

**Changed**

Expand Down
13 changes: 11 additions & 2 deletions docs/SQL Reference/02 Statements.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ Retrieve rows from zero or more relations.

~~~sql
SELECT [ DISTINCT ] select_list
FROM relation
FROM relation [WITH (NOCACHE)]
[ INNER ] JOIN relation
USING (column)
CROSS JOIN relation
Expand Down Expand Up @@ -49,7 +49,7 @@ The `DISTINCT` modifier is specified, only unique rows are included in the resul
### FROM / JOIN clauses

~~~
FROM relation [, ...]
FROM relation [, ...] [WITH (NOCACHE)]
~~~
~~~
FROM relation [ INNER ] JOIN relation < USING (column) | ON condition >
Expand All @@ -67,6 +67,15 @@ The `FROM` clause specifies the source of the data on which the remainder of the

See [Joins](https://mabel-dev.github.io/opteryx/SQL%20Reference/08%20Joins/) for more information on `JOIN` syntax and functionality.

Hints can be provided as part of the statement to direct the query planner and executor to make decisions. Relation hints are declared as `WITH` statements following a relation in the `FROM` and `JOIN` clauses, for example `FROM $astronauts WITH (NOCACHE)`. Reconised hints are:

Hint | Effect
------- | -------------------------------
NOCACHE | Ignores any cache configuration

!!! note
Hints are not guaranteed to be followed, the query planner and executor may ignore hints in specific circumstances.

### FOR clause

~~~
Expand Down
4 changes: 2 additions & 2 deletions opteryx/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,8 @@
import time
from typing import Dict, List, Optional, Tuple, Union

from opteryx.engine import QueryPlanner
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine.planner import QueryPlanner
from opteryx.engine import QueryStatistics
from opteryx.storage import BaseBufferCache, BasePartitionScheme, BaseStorageAdapter
from opteryx.storage.adapters import DiskStorage
from opteryx.storage.schemes import DefaultPartitionScheme, MabelPartitionScheme
Expand Down
3 changes: 2 additions & 1 deletion opteryx/engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,4 +10,5 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from .planner.planner import QueryPlanner
from .query_directives import QueryDirectives
from .query_statistics import QueryStatistics
2 changes: 2 additions & 0 deletions opteryx/engine/planner/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from .planner import QueryPlanner
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/aggregate_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
import pyarrow.json

from opteryx.engine.attribute_types import TOKEN_TYPES
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.exceptions import SqlError
from opteryx.utils.columns import Columns
Expand Down Expand Up @@ -94,7 +94,9 @@ def _map(table, collect_columns):


class AggregateNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):

from opteryx.engine.attribute_types import TOKEN_TYPES

Expand Down
10 changes: 6 additions & 4 deletions opteryx/engine/planner/operations/base_plan_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,24 +12,26 @@


import abc
from typing import Iterable

from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics


class BasePlanNode(abc.ABC):

_producers = None

def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
"""
This is the base class for nodes in the execution plan.
The initializer accepts a QueryStatistics node which is populated by
different nodes differently to record what happened during the query
execution.
"""
pass
self._directives = directives
self._statistics = statistics

def __call__(self):
return self.execute()
Expand Down
8 changes: 5 additions & 3 deletions opteryx/engine/planner/operations/cross_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
from opteryx import config
from opteryx.engine.attribute_types import TOKEN_TYPES
from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError
from opteryx.utils.columns import Columns

Expand Down Expand Up @@ -176,7 +176,9 @@ class CrossJoinNode(BasePlanNode):
Implements a SQL CROSS JOIN and CROSS JOIN UNNEST
"""

def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._right_table = config.get("right_table")
self._join_type = config.get("join_type", "CrossJoin")

Expand Down Expand Up @@ -208,7 +210,7 @@ def execute(self) -> Iterable:

elif self._join_type == "CrossJoinUnnest":

alias, dataset, source = right_node
alias, dataset, source, hints = right_node
function = dataset["function"]
args = dataset["args"]

Expand Down
26 changes: 17 additions & 9 deletions opteryx/engine/planner/operations/dataset_reader_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,10 +35,9 @@

import time


from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.exceptions import DatabaseError, DatasetNotFoundError
from opteryx.exceptions import DatasetNotFoundError
from opteryx.storage import file_decoders
from opteryx.storage.adapters import DiskStorage
from opteryx.storage.schemes import MabelPartitionScheme
Expand Down Expand Up @@ -85,18 +84,19 @@ def _normalize_to_schema(table, schema, statistics):


class DatasetReaderNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
"""
The Dataset Reader Node is responsible for reading the relevant blobs
and returning a Table/Relation.
"""
super().__init__(statistics=statistics, **config)
super().__init__(directives=directives, statistics=statistics, **config)

from opteryx.engine.planner.planner import QueryPlanner

today = datetime.datetime.utcnow().date()

self._statistics = statistics
self._dataset = config.get("dataset", None)
self._alias = config.get("alias", None)

Expand All @@ -105,7 +105,12 @@ def __init__(self, statistics: QueryStatistics, **config):

self._dataset = self._dataset.replace(".", "/") + "/"
self._reader = config.get("reader", DiskStorage())
self._cache = config.get("cache")

self._no_cache = "NOCACHE" in config.get("hints", [])
if self._no_cache:
self._cache = None
else:
self._cache = config.get("cache")
self._partition_scheme = config.get("partition_scheme", MabelPartitionScheme())

self._start_date = config.get("start_date", today)
Expand All @@ -118,10 +123,13 @@ def __init__(self, statistics: QueryStatistics, **config):

@property
def config(self): # pragma: no cover
use_cache = ""
if self._no_cache:
use_cache = " (NOCACHE)"
if self._alias:
return f"{self._dataset} => {self._alias}"
return f"{self._dataset} => {self._alias}{use_cache}"
if isinstance(self._dataset, str):
return self._dataset
return f"{self._dataset}{use_cache}"
return "<complex dataset>"

@property
Expand Down
7 changes: 4 additions & 3 deletions opteryx/engine/planner/operations/distinct_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,15 @@
from pyarrow import Table, concat_tables

from opteryx.engine.planner.operations import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError
from opteryx.third_party.pyarrow_ops import drop_duplicates


class DistinctNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
super().__init__(statistics=statistics, **config)
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._distinct = config.get("distinct", True)

@property
Expand Down
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/evaluation_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,15 @@
from opteryx.engine.attribute_types import TOKEN_TYPES
from opteryx.engine.functions import FUNCTIONS
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError
from opteryx.utils.columns import Columns


class EvaluationNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
projection = config.get("projection", [])
self.functions = [c for c in projection if "function" in c]
self.aliases: list = []
Expand Down
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/explain_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,13 @@
from typing import Iterable

from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics


class ExplainNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._query_plan = config.get("query_plan")

@property
Expand Down
7 changes: 4 additions & 3 deletions opteryx/engine/planner/operations/function_dataset_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from typing import Iterable

from opteryx.engine.attribute_types import TOKEN_TYPES
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.exceptions import SqlError
from opteryx.utils.columns import Columns
Expand Down Expand Up @@ -98,12 +98,13 @@ def _inner(rows, columns):


class FunctionDatasetNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
"""
The Blob Reader Node is responsible for reading the relevant blobs
and returning a Table/Relation.
"""
super().__init__(statistics=statistics, **config)
self._statistics = statistics
self._alias = config["alias"]
self._function = config["dataset"]["function"]
Expand Down
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/inner_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,15 +23,17 @@

from opteryx import config
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError
from opteryx.third_party import pyarrow_ops
from opteryx.utils import arrow
from opteryx.utils.columns import Columns


class InnerJoinNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._right_table = config.get("right_table")
self._join_type = config.get("join_type", "CrossJoin")
self._on = config.get("join_on")
Expand Down
8 changes: 5 additions & 3 deletions opteryx/engine/planner/operations/internal_dataset_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from typing import Iterable, Optional

from opteryx import samples
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.engine.planner.operations import BasePlanNode
from opteryx.exceptions import DatabaseError
from opteryx.utils.columns import Columns
Expand Down Expand Up @@ -49,12 +49,14 @@ def _get_sample_dataset(dataset, alias):


class InternalDatasetNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
"""
The Blob Reader Node is responsible for reading the relevant blobs
and returning a Table/Relation.
"""
super().__init__(statistics=statistics, **config)
super().__init__(directives=directives, statistics=statistics, **config)

self._statistics = statistics
self._alias = config["alias"]
Expand Down
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/limit_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,14 @@
from pyarrow import Table, concat_tables

from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError


class LimitNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._limit = config.get("limit")

@property
Expand Down
7 changes: 4 additions & 3 deletions opteryx/engine/planner/operations/offset_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,14 @@
import pyarrow

from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError


class OffsetNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
super().__init__(statistics=statistics, **config)
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._offset = config.get("offset")

@property
Expand Down
6 changes: 4 additions & 2 deletions opteryx/engine/planner/operations/outer_join_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
import pyarrow

from opteryx.engine.planner.operations.base_plan_node import BasePlanNode
from opteryx.engine.query_statistics import QueryStatistics
from opteryx.engine import QueryDirectives, QueryStatistics
from opteryx.exceptions import SqlError
from opteryx.utils import arrow
from opteryx.utils.columns import Columns
Expand All @@ -35,7 +35,9 @@


class OuterJoinNode(BasePlanNode):
def __init__(self, statistics: QueryStatistics, **config):
def __init__(
self, directives: QueryDirectives, statistics: QueryStatistics, **config
):
self._join_type = OUTER_JOINS[config.get("join_type")]
self._on = config.get("join_on")
self._using = config.get("join_using")
Expand Down
Loading

0 comments on commit d4102bf

Please sign in to comment.