Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

#1491 #1511

Merged
merged 4 commits into from
Mar 7, 2024
Merged

#1491 #1511

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion opteryx/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__build__ = 344
__build__ = 346

# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand Down
1 change: 1 addition & 0 deletions opteryx/connectors/virtual_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
WELL_KNOWN_DATASETS = {
"$astronauts": (virtual_datasets.astronauts, True),
"$planets": (virtual_datasets.planets, True),
"$missions": (virtual_datasets.missions, True),
"$satellites": (virtual_datasets.satellites, True),
"$variables": (virtual_datasets.variables, True),
"$derived": (virtual_datasets.derived, False),
Expand Down
1 change: 1 addition & 0 deletions opteryx/virtual_datasets/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@

import opteryx.virtual_datasets.astronaut_data as astronauts
import opteryx.virtual_datasets.derived_data as derived
import opteryx.virtual_datasets.missions as missions
import opteryx.virtual_datasets.no_table_data as no_table
import opteryx.virtual_datasets.planet_data as planets
import opteryx.virtual_datasets.satellite_data as satellites
Expand Down
2 changes: 2 additions & 0 deletions opteryx/virtual_datasets/astronaut_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

from orso.schema import FlatColumn
from orso.schema import RelationSchema
from orso.tools import single_item_cache
from orso.types import OrsoTypes

__all__ = ("read", "schema")


@single_item_cache
def read(*args):
import base64
import io
Expand Down
66 changes: 66 additions & 0 deletions opteryx/virtual_datasets/missions.py

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions opteryx/virtual_datasets/planet_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,11 +35,13 @@

from orso.schema import FlatColumn
from orso.schema import RelationSchema
from orso.tools import single_item_cache
from orso.types import OrsoTypes

__all__ = ("read", "schema")


@single_item_cache
def read(end_date=None, *args):
import pyarrow

Expand Down
2 changes: 2 additions & 0 deletions opteryx/virtual_datasets/satellite_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,13 @@

from orso.schema import FlatColumn
from orso.schema import RelationSchema
from orso.tools import single_item_cache
from orso.types import OrsoTypes

__all__ = ("read", "schema")


@single_item_cache
def read(*args):
import base64
import io
Expand Down
Binary file added testdata/sqlite/100000-tweets.db
Binary file not shown.
Binary file modified testdata/sqlite/database.db
Binary file not shown.
62 changes: 62 additions & 0 deletions testdata/sqlite/loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
import sqlite3

DB_NAME = "sqlite3_opt.db"


def create_table_statement(name: str, schema) -> str:
yield f"CREATE TABLE IF NOT EXISTS {name} ("
total_columns = len(schema.columns)
for i, column in enumerate(schema.columns, start=1):
# Check if it's the last column
if i == total_columns:
yield f"\t{column.name:<20}\t{column.type.name}"
else:
yield f"\t{column.name:<20}\t{column.type.name},"
yield ");"


def format_value(value):
try:
if value != value: # Check for NaN values which are not equal to themselves
return "NULL"
elif isinstance(value, str):
return "'" + value.replace("'", "''") + "'" # Properly quote strings
except:
pass
return str(value) # Use the string representation for other data types


def creator(name, con: sqlite3.Connection, dataset):

create_sql = "\n".join(create_table_statement(name, dataset.schema))
print(create_sql)
con.execute(create_sql)

con.execute("BEGIN")

for row in dataset:
con.execute(f'INSERT INTO {name} VALUES ({", ".join(format_value(r) for r in row)});')
con.commit()


def main():
conn = sqlite3.connect(DB_NAME, isolation_level=None)
conn.execute("PRAGMA journal_mode = OFF;")
conn.execute("PRAGMA synchronous = 0;")
conn.execute("PRAGMA cache_size = 1000000;") # give it a GB
conn.execute("PRAGMA locking_mode = EXCLUSIVE;")
conn.execute("PRAGMA temp_store = MEMORY;")

import opteryx

# dataset = opteryx.query("SELECT tweet_id, text, timestamp, user_id, user_verified, user_name, followers, following, tweets_by_user FROM testdata.flat.formats.parquet;")
# creator("tweets", conn, dataset)
# dataset = opteryx.query("SELECT * FROM $planets;")
# creator("planets", conn, dataset)

print(conn.execute("SELECT * FROM tweets").fetchall())
# print(conn.execute("SELECT * FROM planets").fetchall())


if __name__ == "__main__":
main()
4 changes: 4 additions & 0 deletions tests/fuzzing/test_sql_fuzzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,10 @@ def generate_random_sql_select(columns, table):
"name": virtual_datasets.astronauts.schema().name,
"fields": virtual_datasets.astronauts.schema().columns,
},
{
"name": virtual_datasets.missions.schema().name,
"fields": virtual_datasets.missions.schema().columns,
},
]

TEST_CYCLES: int = 250
Expand Down
3 changes: 3 additions & 0 deletions tests/sql_battery/test_shapes_and_errors_battery.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
("SELECT * FROM $no_table", 1, 1, None),
("SELECT * FROM sqlite.planets", 9, 20, None),
("SELECT * FROM $variables", 41, 4, None),
("SELECT * FROM $missions", 4630, 8, None),
(b"SELECT * FROM $satellites", 177, 8, None),

# Does the error tester work
Expand Down Expand Up @@ -1180,6 +1181,8 @@
# 10-way join
("SELECT p1.name AS planet1_name, p2.name AS planet2_name, p3.name AS planet3_name, p4.name AS planet4_name, p5.name AS planet5_name, p6.name AS planet6_name, p7.name AS planet7_name, p8.name AS planet8_name, p9.name AS planet9_name, p10.name AS planet10_name, p1.diameter AS planet1_diameter, p2.gravity AS planet2_gravity, p3.orbitalPeriod AS planet3_orbitalPeriod, p4.numberOfMoons AS planet4_numberOfMoons, p5.meanTemperature AS planet5_meanTemperature FROM $planets p1 JOIN $planets p2 ON p1.id = p2.id JOIN $planets p3 ON p1.id = p3.id JOIN $planets p4 ON p1.id = p4.id JOIN $planets p5 ON p1.id = p5.id JOIN $planets p6 ON p1.id = p6.id JOIN $planets p7 ON p1.id = p7.id JOIN $planets p8 ON p1.id = p8.id JOIN $planets p9 ON p1.id = p9.id JOIN $planets p10 ON p1.id = p10.id WHERE p1.diameter > 10000 ORDER BY p1.name, p2.name, p3.name, p4.name, p5.name;", 6, 15, None),

("SELECT mission, LIST(name) FROM $missions INNER JOIN (SELECT * FROM $astronauts CROSS JOIN UNNEST(missions) AS mission) AS astronauts ON Mission = mission GROUP BY mission", 16, 2, None),

# virtual dataset doesn't exist
("SELECT * FROM $RomanGods", None, None, DatasetNotFoundError),
# disk dataset doesn't exist
Expand Down
14 changes: 14 additions & 0 deletions tests/storage/test_sql_postgres.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import os
import sys
import time

sys.path.insert(1, os.path.join(sys.path[0], "../.."))

Expand All @@ -25,37 +26,50 @@ def test_postgres_storage():
connection=f"postgresql+psycopg2://{POSTGRES_USER}:{POSTGRES_PASSWORD}@trumpet.db.elephantsql.com/{POSTGRES_USER}",
)

t = time.monotonic_ns()
results = opteryx.query("SELECT * FROM pg.planets")
assert results.rowcount == 9, results.rowcount
assert results.columncount == 20
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()

# PROCESS THE DATA IN SOME WAY
results = opteryx.query("SELECT COUNT(*) FROM pg.planets;")
assert results.rowcount == 1, results.rowcount
assert results.columncount == 1
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()

# PUSH A PROJECTION
results = opteryx.query("SELECT name FROM pg.planets;")
assert results.rowcount == 9, results.rowcount
assert results.columncount == 1
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()

# JOIN ON A NON SQL TABLE
results = opteryx.query(
"SELECT * FROM pg.planets INNER JOIN $satellites ON pg.planets.id = $satellites.planetId;"
)
assert results.rowcount == 177, results.rowcount
assert results.columncount == 28, results.columncount
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()

# PUSH - CHECK STATS THE PUSHES WORKED
results = opteryx.query("SELECT name FROM pg.planets WHERE name LIKE 'Earth';")
assert results.rowcount == 1, results.rowcount
assert results.columncount == 1
assert results.stats["rows_read"] == 1
assert results.stats["columns_read"] == 1
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()

results = opteryx.query("SELECT * FROM pg.planets WHERE id > gravity")
assert results.rowcount == 2, results.rowcount
assert results.stats.get("rows_read", 0) == 9, results.stats
print((time.monotonic_ns() - t) / 1e9)
t = time.monotonic_ns()


if __name__ == "__main__": # pragma: no cover
Expand Down
Loading