Skip to content

Commit

Permalink
Merge pull request #1053 from mabel-dev/#1038
Browse files Browse the repository at this point in the history
  • Loading branch information
joocer committed May 27, 2023
2 parents 548fb96 + 82cc0cd commit 348583f
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 8 deletions.
16 changes: 12 additions & 4 deletions opteryx/connectors/sql_connector.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
from opteryx import config
from opteryx.connectors.capabilities import PredicatePushable
from opteryx.exceptions import MissingDependencyError
from opteryx.exceptions import UnmetRequirementError


class BaseSQLStorageAdapter: # this is used by the SHOW STORES statement
Expand Down Expand Up @@ -56,17 +57,23 @@ class SqlConnector(BaseSQLStorageAdapter, PredicatePushable):
__mode__ = "SQL"

def __init__(
self, prefix: str = "", remove_prefix: bool = False, connection: str = None
self, prefix: str = "", remove_prefix: bool = False, connection: str = None, engine=None
) -> None:
super(BaseSQLStorageAdapter, self).__init__()
super(PredicatePushable, self).__init__()
# we're just testing we can import here
try:
from sqlalchemy import create_engine
import sqlalchemy
except ImportError as err: # pragma: nocover
raise MissingDependencyError(err.name) from err

self._connection = connection
self._engine = engine

if self._engine is None and self._connection is None:
raise UnmetRequirementError(
"SQL Connections require either a SQL Alchemy connection string in the 'connection' parameter, or a SQL Alchemy Engine in the 'engine' parameter."
)

self._remove_prefix = remove_prefix
self._prefix = prefix
Expand Down Expand Up @@ -104,8 +111,9 @@ def read_records(
for predicate in self._predicates:
query_builder.WHERE(_write_predicate(predicate))

engine = _get_engine(self._connection)
with engine.connect() as conn:
if self._engine is None:
self._engine = _get_engine(self._connection)
with self._engine.connect() as conn:
result = conn.execute(text(str(query_builder)))

batch = result.fetchmany(chunk_size)
Expand Down
6 changes: 3 additions & 3 deletions opteryx/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def fuzzy_search(name, candidates):
best_match_score = 100

name = "".join(char for char in name if char.isalnum())
for raw_candidate, candidate in (
for raw, candidate in (
(
ca,
"".join(ch for ch in ca if ch.isalnum()),
Expand All @@ -71,10 +71,10 @@ def fuzzy_search(name, candidates):
):
my_dist = compare(candidate, name)
if my_dist == 0: # if we find an exact match, return that
return raw_candidate
return raw
if 0 <= my_dist < best_match_score:
best_match_score = my_dist
best_match_column = raw_candidate
best_match_column = raw

return best_match_column

Expand Down
2 changes: 1 addition & 1 deletion opteryx/version.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,4 +17,4 @@
"""

# __version__ = "0.4.0-alpha.6"
__version__ = "0.10.0"
__version__ = "0.11.0-alpha.1"
52 changes: 52 additions & 0 deletions tests/misc/test_fuzzy_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,58 @@
("apple", ["appl", "aple", "aplee", "aplle", "aplle"], "appl"), # first best match
("apple", ["aple", "appl", "aplee", "aplle", "aplee"], "aple"), # first best match
("apple", ["aplee", "applle"], "applle"),
("banana", ["apple", "pear", "grape"], None),
("snack", ["cracker", "crack", "snack"], "snack"),
("cucumberrr", ["cabbage", "celery", "cucumber", "carrot", "cantaloupe"], "cucumber"),
("pop", ["snapple", "crackle", "pop"], "pop"),
("grape", ["apple", "pear", "grape"], "grape"),
("lettuce", ["cabbage", "celery", "cucumber", "carrot", "cantaloupe"], None),
("world", ["hello", "world"], "world"),
("mango", ["apple", "pear", "grape"], None),
("blueberry", ["strawberry", "raspberry", "blackberry"], None),
("elephant", ["lion", "tiger", "bear"], None),
("cafe", ["coffee", "café", "caffeine"], "café"),
("gra_pe", ["apple", "grape", "grapefruit"], "grape"),
("lemonade", ["limeade", "lemonade", "orangeade"], "lemonade"),
("coconut", ["coconut", "cocoa", "coffee"], "coconut"),
("eleven", ["seven", "eleven", "twelve"], "eleven"),
("kiwi", ["apple", "banana", "kiwi"], "kiwi"),
("beet", ["beetroot", "carrot", "potato"], None),
("pineapple", ["pineapple", "apple", "banana"], "pineapple"),
("watermelon", ["melon", "watermelon", "grapefruit"], "watermelon"),
("chocolate", ["vanilla", "strawberry", "chocolate"], "chocolate"),
("peach", ["apple", "pear", "peach"], "peach"),
("brocolli", ["spinach", "kale", "broccoli"], "broccoli"),
("apple", ["Apple", "ApPle", "aPple", "aPPle"], "Apple"),
("app.le", ["apple", "apples", "ap.le", "appl.e"], "apple"),
("!orange", ["apple", "banana", "!orange", "orange!"], "!orange"),
("Lemonade", ["lemonade", "LEMOnade", "LEMONADE"], "lemonade"),
("Kiwi!", ["Kiwi", "kiwi!"], "Kiwi"),
("strawberry", ["Strawberries", "Strawberry"], "Strawberry"),
("mango", ["MANGO", "MangO", "MaNgo", "manGO"], "MANGO"),
("!coconut!", ["coconut", "CocoNut", "coconut!", "!Coconut"], "coconut"),
("watermelon", ["watermelon", "WaTerMeLon", "watermelons", "wateRmElon"], "watermelon"),
("grape!", ["GraPe", "GRAPE"], "GraPe"),
("_melon", ["watermelon", "_melon", "me_lon", "MELON_"], "_melon"),
("apple?", ["apple", "APPLE?", "applE", "APPLE"], "apple"),
("BaNaNa", ["banana", "BANANA", "banAna", "BaNAna"], "banana"),
("pEar!", ["pear", "PEAr", "Pear!", "pear"], "pear"),
("!chocolate!", ["Chocolate", "!ChOcOlate!", "chocolate", "CHOCOLATE"], "Chocolate"),
("apri_cot", ["apricot", "APR!COT", "ApriCOT", "Apricot"], "apricot"),
("a.b.c.d", ["abcd", "a.b.cd", "a.b.c.d.e"], "abcd"),
("a+b-c*d", ["abcd", "a+b-c*d", "a-b+c-d*e"], "abcd"),
("aBcDe", ["AbCdE", "aBCde", "abcde"], "AbCdE"),
("b-a+n+a+n+a", ["banana", "apple", "orange"], "banana"),
("12345", ["54321", "12345", "543210"], "12345"),
("123.45", ["543.21", "123.45", "543.210"], "123.45"),
("!@#$%", ["!@#$%", "!@#$%^", "!@#$%^&"], "!@#$%"),
("hello.world", ["helloworld", "hello.world", "hello-world"], "helloworld"),
("a!", ["a!", "a!!", "a!!!"], "a!"),
("grapefruit", ["grapefruit", "grape", "fruit"], "grapefruit"),
("apple", ["APPLE", "apple"], "APPLE"),
("apple", ["APple", "ApPle", "apPle", "APPle"], "APple"),
("banana", ["banana", "baNanA", "BANANA"], "banana"),
("orange", ["OrAnGe", "or_ange", "orange"], "OrAnGe"),
]
# fmt:on

Expand Down
32 changes: 32 additions & 0 deletions tests/storage/test_sql_sqlite copy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Test we can read from Sqlite - this is a basic exercise of the SQL Connector
"""
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], "../.."))

import opteryx

from opteryx.connectors import SqlConnector


def test_sqlite_storage():
opteryx.register_store(
"sqlite",
SqlConnector,
remove_prefix=True,
connection="sqlite:///testdata/sqlite/database.db",
)

results = opteryx.query("SELECT * FROM sqlite.planets")
assert results.rowcount == 9, results.rowcount

# PROCESS THE DATA IN SOME WAY
results = opteryx.query("SELECT COUNT(*) FROM sqlite.planets;")
assert results.rowcount == 1, results.rowcount


if __name__ == "__main__": # pragma: no cover
test_sqlite_storage()
print("✅ okay")
32 changes: 32 additions & 0 deletions tests/storage/test_sqlalchemy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""
Test we can read from Sqlite - this is a basic exercise of the SQL Connector
"""
import os
import sys

sys.path.insert(1, os.path.join(sys.path[0], "../.."))

import opteryx

from opteryx.connectors import SqlConnector


def test_sqlalchemy():
from sqlalchemy import create_engine

connection_string = "sqlite:///testdata/sqlite/database.db"
engine = create_engine(connection_string)

opteryx.register_store("sqlite", SqlConnector, remove_prefix=True, engine=engine)

results = opteryx.query("SELECT * FROM sqlite.planets")
assert results.rowcount == 9, results.rowcount

# PROCESS THE DATA IN SOME WAY
results = opteryx.query("SELECT COUNT(*) FROM sqlite.planets;")
assert results.rowcount == 1, results.rowcount


if __name__ == "__main__": # pragma: no cover
test_sqlalchemy()
print("✅ okay")

0 comments on commit 348583f

Please sign in to comment.