Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed May 14, 2023
1 parent 401e877 commit d0fe829
Show file tree
Hide file tree
Showing 9 changed files with 104 additions and 78 deletions.
90 changes: 69 additions & 21 deletions opteryx/components/v2/binder.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,79 +16,127 @@

import copy

from orso.logging import get_logger

from opteryx.exceptions import DatabaseError

logger = get_logger()


class Schema:
relation: str


class BinderVisitor:
def visit_node(self, node, context=None):
node_type = node.node_type
visit_method_name = f"visit_{node_type.split('.')[1].lower()}"
visit_method = getattr(self, visit_method_name, self.visit_unsupported)
return visit_method(node, context)
result = visit_method(node, context)
if not isinstance(result, dict):
raise DatabaseError(f"function {visit_method_name} didn't return a dict")

def visit_unsupported(self, node, context):
raise NotImplementedError(f"No visit method implemented for node type {node.node_type}")

def visit_project(self, node, context):
raise NotImplementedError("visit project")
logger.warning("visit_project not implemented")
return context

def visit_filter(self, node, context):
raise NotImplementedError("visit filter")
logger.warning("visit_filter not implemented")
return context

def visit_union(self, node, context):
raise NotImplementedError("visit union")
logger.warning("visit_union not implemented")
return context

def visit_explain(self, node, context):
raise NotImplementedError("visit explain")
logger.warning("visit_explain not implemented")
return context

def visit_difference(self, node, context):
raise NotImplementedError("visit difference")
logger.warning("visit_difference not implemented")
return context

def visit_join(self, node, context):
raise NotImplementedError("visit join")
logger.warning("visit_join not implemented")
return context

def visit_group(self, node, context):
raise NotImplementedError("visit group")
logger.warning("visit_group not implemented")
return context

def visit_aggregate(self, node, context):
raise NotImplementedError("visit aggregate")
logger.warning("visit_aggregate not implemented")
return context

def visit_scan(self, node, context):
if node.relation[0] == "$":
from opteryx import samples

node.connector = "Internal"
_schema = samples.planets.schema

raise NotImplementedError("visit scan")
"""
- determine the source of the relation:
- sample
- in-memory
- on-disk
- storage
- collection
- sql
- if we can get the schema, do that and add it to the context
"""

def visit_show(self, node, context):
raise NotImplementedError("visit show")
logger.warning("visit_show not implemented")
return context

def visit_show_columns(self, node, context):
raise NotImplementedError("visit show columns")
logger.warning("visit_show_columns not implemented")
return context

def visit_set(self, node, context):
raise NotImplementedError("visit set")
logger.warning("visit_set not implemented")
return context

def visit_limit(self, node, context):
raise NotImplementedError("visit limit")
logger.warning("visit_limit not implemented")
return context

def visit_order(self, node, context):
raise NotImplementedError("visit order")
logger.warning("visit_order not implemented")
return context

def visit_distinct(self, node, context):
raise NotImplementedError("visit distinct")
logger.warning("visit_distinct not implemented")
return context

def visit_cte(self, node, context):
raise NotImplementedError("visit cte")
logger.warning("visit_cte not implemented")
return context

def visit_subquery(self, node, context):
raise NotImplementedError("visit subquery")
logger.warning("visit_subquery not implemented")
return context

def visit_values(self, node, context):
raise NotImplementedError("visit values")
logger.warning("visit_values not implemented")
return context

def visit_unnest(self, node, context):
raise NotImplementedError("visit unnest")
logger.warning("visit_unnest not implemented")
return context

def visit_generate_series(self, node, context):
raise NotImplementedError("visit generate series")
logger.warning("visit_generate_series not implemented")
return context

def visit_fake(self, node, context):
raise NotImplementedError("visit fake")
logger.warning("visit_fake not implemented")
return context

def traverse(self, graph, node, context=None):
"""
Expand Down
2 changes: 1 addition & 1 deletion opteryx/operators/function_dataset_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def _unnest(alias, values):
# single item lists are reported as nested
from opteryx.samples import no_table

list_items = evaluate(values, no_table(), True)
list_items = evaluate(values, no_table.read(), True)
return [{alias: row} for row in list_items]


Expand Down
8 changes: 4 additions & 4 deletions opteryx/operators/internal_dataset_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,10 @@ def _get_sample_dataset(dataset, alias, end_date):
# we do this like this so the datasets are not loaded into memory unless
# they are going to be used
sample_datasets = {
"$satellites": samples.satellites,
"$planets": samples.planets,
"$astronauts": samples.astronauts,
"$no_table": samples.no_table,
"$satellites": samples.satellites.read,
"$planets": samples.planets.read,
"$astronauts": samples.astronauts.read,
"$no_table": samples.no_table.read,
}
dataset = dataset.lower()
if dataset in sample_datasets:
Expand Down
45 changes: 4 additions & 41 deletions opteryx/samples/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,44 +12,7 @@

import datetime


def satellites(*args):
"""load the satellite sample data"""
from .satellite_data import load

return load()


def planets(end_date=datetime.datetime.utcnow().date()):
"""load the planets sample data"""
from .planet_data import load

full_set = load()

# make planet data act like it support temporality
mask = [True, True, True, True, True, True, True, True, True]
if end_date < datetime.date(1930, 3, 13):
# March 13, 1930 - Pluto discovered by Clyde William Tombaugh
mask = [True, True, True, True, True, True, True, True, False]
if end_date < datetime.date(1846, 11, 13):
# November 13, 1846 - Neptune
mask = [True, True, True, True, True, True, True, False, False]
if end_date < datetime.date(1781, 4, 26):
# April 26, 1781 - Uranus discovered by Sir William Herschel
mask = [True, True, True, True, True, True, False, False, False]

return full_set.filter(mask)


def astronauts(*args):
"""load the astronaut sample data"""
from .astronaut_data import load

return load()


def no_table(*args):
"""load the null data table"""
from .no_table_data import load

return load()
import opteryx.samples.astronaut_data as astronauts
import opteryx.samples.no_table_data as no_table
import opteryx.samples.planet_data as planets
import opteryx.samples.satellite_data as satellites
2 changes: 1 addition & 1 deletion opteryx/samples/astronaut_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import pyarrow.parquet as pq


def load():
def read(*args):
"""The table is saved parquet table, base85 encoded."""
return pq.read_table(
io.BytesIO(
Expand Down
2 changes: 1 addition & 1 deletion opteryx/samples/no_table_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
"""


def load():
def read(*args):
import pyarrow

# Create a PyArrow schema with one column called 'column' of integer type
Expand Down
19 changes: 17 additions & 2 deletions opteryx/samples/planet_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,9 +30,10 @@
This has a companion dataset, $satellites, to help test joins.
"""
import datetime


def load():
def read(end_date=datetime.datetime.utcnow().date()):
import pyarrow

# fmt:off
Expand Down Expand Up @@ -62,7 +63,21 @@ def load():
column_names = ["id", "name", "mass", "diameter", "density", "gravity", "escapeVelocity", "rotationPeriod", "lengthOfDay", "distanceFromSun", "perihelion", "aphelion", "orbitalPeriod", "orbitalVelocity", "orbitalInclination", "orbitalEccentricity", "obliquityToOrbit", "meanTemperature", "surfacePressure", "numberOfMoons"]

# fmt: on
return pyarrow.Table.from_arrays(data, column_names)
full_set = pyarrow.Table.from_arrays(data, column_names)

# make planet data act like it support temporality
mask = [True, True, True, True, True, True, True, True, True]
if end_date < datetime.date(1930, 3, 13):
# March 13, 1930 - Pluto discovered by Clyde William Tombaugh
mask = [True, True, True, True, True, True, True, True, False]
if end_date < datetime.date(1846, 11, 13):
# November 13, 1846 - Neptune
mask = [True, True, True, True, True, True, True, False, False]
if end_date < datetime.date(1781, 4, 26):
# April 26, 1781 - Uranus discovered by Sir William Herschel
mask = [True, True, True, True, True, True, False, False, False]

return full_set.filter(mask)


schema = {
Expand Down
2 changes: 1 addition & 1 deletion opteryx/samples/satellite_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
import pyarrow.parquet as pq


def load():
def read(*args):
"""The table is saved parquet table, base85 encoded."""

return pq.read_table(
Expand Down
12 changes: 6 additions & 6 deletions tests/misc/test_expressions.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@

@pytest.mark.parametrize("node_type, value", LITERALS)
def test_literals(node_type, value):
planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

node = ExpressionTreeNode(node_type, value=value)
values = evaluate(node, table=planets)
Expand All @@ -71,7 +71,7 @@ def test_logical_expressions():
illogical from a user perspective but technically correct.
"""

planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

true = ExpressionTreeNode(NodeType.LITERAL_BOOLEAN, value=True)
false = ExpressionTreeNode(NodeType.LITERAL_BOOLEAN, value=False)
Expand Down Expand Up @@ -128,7 +128,7 @@ def test_logical_expressions():


def test_reading_identifiers():
planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

names_node = ExpressionTreeNode(NodeType.IDENTIFIER, value="name")
names = evaluate(names_node, planets)
Expand All @@ -151,7 +151,7 @@ def test_reading_identifiers():


def test_function_operations():
planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

name = ExpressionTreeNode(NodeType.IDENTIFIER, value="name")
concat = ExpressionTreeNode(
Expand Down Expand Up @@ -201,7 +201,7 @@ def test_function_operations():


def test_compound_expressions():
planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

# this builds and tests the following `3.7 * gravity > mass`

Expand Down Expand Up @@ -232,7 +232,7 @@ def test_compound_expressions():


def test_functions():
planets = opteryx.samples.planets()
planets = opteryx.samples.planets.read()

gravity = ExpressionTreeNode(NodeType.IDENTIFIER, value="gravity")
_round = ExpressionTreeNode(NodeType.FUNCTION, value="ROUND", parameters=[gravity])
Expand Down

0 comments on commit d0fe829

Please sign in to comment.