Skip to content

Commit

Permalink
Merge pull request #233 from mabel-dev/FIX/#230
Browse files Browse the repository at this point in the history
Feature/#230
  • Loading branch information
joocer committed Jun 25, 2022
2 parents 8e4082f + 392ad91 commit cbcd0ee
Show file tree
Hide file tree
Showing 7 changed files with 42 additions and 0 deletions.
1 change: 1 addition & 0 deletions docs/Release Notes/Change Log.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
- [[#203](https://github.com/mabel-dev/opteryx/issues/203)] When reporting that a column doesn't exist, it should suggest likely correct columns. ([@joocer](https://github.com/joocer))
- Not Regular Expression match operator, `!~` added to supported set of operators. ([@joocer](https://github.com/joocer))
- [[#226](https://github.com/mabel-dev/opteryx/issues/226)] Implement `DATE_TRUNC` function. ([@joocer](https://github.com/joocer))
- [[#230](https://github.com/mabel-dev/opteryx/issues/230)] Allow addressing fields as numbers. ([@joocer](https://github.com/joocer))

**Changed**

Expand Down
4 changes: 4 additions & 0 deletions docs/SQL Reference/02 Statements.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,8 @@ HAVING group_filter

The `GROUP BY` clause specifies which grouping columns should be used to perform any aggregations in the `SELECT` clause. If the `GROUP BY` clause is specified, the query is always an aggregate query, even if no aggregations are present in the `SELECT` clause. The `HAVING` clause specifies filters to apply to aggregated data, `HAVING` clauses require a `GROUP BY` clause.

`GROUP BY` expressions may use column numbers, however, this is not recommended for statements intended for reuse.

### ORDER BY / LIMIT / OFFSET clauses

~~~
Expand All @@ -125,6 +127,8 @@ LIMIT count

`ORDER BY`, `LIMIT` and `OFFSET` are output modifiers. Logically they are applied at the very end of the query. The `OFFSET` clause discards initial rows from the returned set, the `LIMIT` clause restricts the amount of rows fetched, and the `ORDER BY` clause sorts the rows on the sorting criteria in either ascending or descending order.

`ORDER BY` expressions may use column numbers, however, this is not recommended for statements intended for reuse.

## SHOW COLUMNS

List the columns in a relation along with their data type and an indication if nulls have been found in the first page of records.
Expand Down
13 changes: 13 additions & 0 deletions opteryx/engine/planner/operations/aggregate_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
But, on high cardinality data (nearly unique columns), the performance is much faster,
on a 10m record set, timings are 1:400 (50s:1220s where 20s is the read time).
"""
from asyncio import create_subprocess_shell
from typing import Iterable, List

import numpy as np
Expand Down Expand Up @@ -100,6 +101,7 @@ def __init__(

from opteryx.engine.attribute_types import TOKEN_TYPES

self._positions = []
self._aggregates = []
self._groups = config.get("groups", [])
self._project = self._groups.copy()
Expand All @@ -115,10 +117,16 @@ def __init__(
self._project.append(column)
else:
raise SqlError("Can only aggregate on fields in the dataset.")
self._positions.append(column)
elif "column_name" in attribute:
self._project.append(attribute["column_name"])
if attribute["alias"]:
self._positions.append(attribute["alias"][0])
else:
self._positions.append(attribute["column_name"])
else:
self._project.append(attribute["identifier"])
self._positions.append(attribute["identifier"])

self._project = [p for p in self._project if p is not None]

Expand Down Expand Up @@ -194,13 +202,18 @@ def execute(self) -> Iterable:

for key in self._project:
if key != "*":
if isinstance(key, int):
key = self._positions[key - 1]
column = columns.get_column_from_alias(key, only_one=True)
if column not in self._mapped_project:
self._mapped_project.append(column)
else:
self._mapped_project.append("*")

for group in self._groups:
# if we have a number, use it as an column offset
if isinstance(group, int):
group = self._positions[group - 1]
self._mapped_groups.append(
columns.get_column_from_alias(group, only_one=True)
)
Expand Down
11 changes: 11 additions & 0 deletions opteryx/engine/planner/operations/sort_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,17 @@ def execute(self) -> Iterable:
)
)

# we have an index rather than a column name, it's a natural number but the
# list of column names is zero-based, so we subtract one
elif isinstance(column, int):
column_name = table.column_names[column - 1]
self._mapped_order.append(
(
column_name,
direction,
)
)

else:
self._mapped_order.append(
(
Expand Down
4 changes: 4 additions & 0 deletions opteryx/engine/planner/planner.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,6 +535,8 @@ def _extract_order(self, ast):
]
alias = f"{func.upper()}({','.join([str(a[0]) for a in args])})"
column = {"function": func, "args": args, "alias": alias}
if "Value" in column:
column = int(column["Value"]["Number"][0])
orders.append(
(
column,
Expand Down Expand Up @@ -570,6 +572,8 @@ def _inner(element):
if "Number" in key_dict:
key = key_dict["Number"][0]
return f"{identifier}[{key}]"
if "Value" in element:
return int(element["Value"]["Number"][0])

groups = ast[0]["Query"]["body"]["Select"]["group_by"]
return [_inner(g) for g in groups]
Expand Down
1 change: 1 addition & 0 deletions opteryx/utils/columns.py
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ def fuzzy_search(self, column_name):
for k, v in self._column_metadata.items():
for alias in v.get("aliases"):
my_dist = compare(column_name, alias)
print(alias)
if my_dist > 0 and my_dist < best_match_score:
best_match_score = my_dist
best_match_column = alias
Expand Down
8 changes: 8 additions & 0 deletions tests/sql_battery/test_battery_shape.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,11 @@
("SELECT * FROM $satellites LIMIT 50 OFFSET 150", 27, 8),
("SELECT * FROM $satellites LIMIT 50 OFFSET 170", 7, 8),
("SELECT * FROM $satellites ORDER BY name", 177, 8),
("SELECT * FROM $satellites ORDER BY 1", 177, 8),
("SELECT * FROM $satellites ORDER BY 1 DESC", 177, 8),
("SELECT * FROM $satellites ORDER BY 2", 177, 8),
("SELECT * FROM $satellites ORDER BY 1, 2", 177, 8),
("SELECT * FROM $satellites ORDER BY 1 ASC", 177, 8),
("SELECT * FROM $satellites ORDER BY RANDOM()", 177, 8),

("SELECT MAX(planetId) FROM $satellites", 1, 1),
Expand All @@ -158,6 +163,9 @@
("SELECT GET(name, 1) FROM $satellites GROUP BY planetId, GET(name, 1)", 56, 1),
("SELECT COUNT(*), ROUND(magnitude) FROM $satellites group by ROUND(magnitude)", 27, 2),
("SELECT ROUND(magnitude) FROM $satellites group by ROUND(magnitude)", 27, 1),
("SELECT VARCHAR(planetId), COUNT(*) FROM $satellites GROUP BY 1", 7, 2),
("SELECT LEFT(name, 1), COUNT(*) FROM $satellites GROUP BY 1 ORDER BY 2 DESC", 21, 2),
("SELECT LEFT(name, 1) as le, COUNT(*) FROM $satellites GROUP BY 1 ORDER BY 2 DESC", 21, 2),
("SELECT round(magnitude) FROM $satellites group by round(magnitude)", 27, 1),
("SELECT upper(name) as NAME, id as Identifier FROM $satellites", 177, 2),
("SELECT upper(name), lower(name), id as Identifier FROM $satellites", 177, 3),
Expand Down

0 comments on commit cbcd0ee

Please sign in to comment.