Skip to content

Commit

Permalink
Add tests for SQLite to PostgreSQL data type conversion (#797)
Browse files Browse the repository at this point in the history
* Add unit tests for SQLite to PostgreSQL data type conversion logic

* Fix blob and decimal support for sqlite uploads
  • Loading branch information
neumark committed Mar 30, 2023
1 parent e89d063 commit c4b6b08
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 6 deletions.
46 changes: 40 additions & 6 deletions splitgraph/ingestion/sqlite/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
import contextlib
import itertools
import os
import re
import sqlite3
import tempfile
from contextlib import contextmanager
Expand Down Expand Up @@ -35,6 +36,17 @@
ORDER BY 1,2;
"""

RE_SINGLE_PARAM_TYPE = re.compile(r"^([A-Z ]+)\(\s*([0-9]+)\s*\)$")
RE_DOUBLE_PARAM_TYPE = re.compile(r"^([A-Z ]+)\(\s*([0-9]+)\s*,\s*([0-9]+)\s*\)$")
# from: https://www.sqlite.org/datatype3.html#affinity_name_examples
VARCHAR_ALIASES = {
"CHARACTER",
"VARCHAR",
"VARYING CHARACTER",
"NCHAR",
"NATIVE CHARACTER",
"NVARCHAR",
}

# based on https://stackoverflow.com/a/16696317
def download_file(url: str, local_fh: tempfile._TemporaryFileWrapper) -> int:
Expand Down Expand Up @@ -70,7 +82,19 @@ def db_from_minio(url: str) -> Generator[sqlite3.Connection, None, None]:


# partly based on https://stackoverflow.com/questions/1942586/comparison-of-database-column-types-in-mysql-postgresql-and-sqlite-cross-map
def sqlite_to_postgres_type(sqlite_type: str) -> str:
def sqlite_to_postgres_type(raw_sqlite_type: str) -> str:
sqlite_type = raw_sqlite_type.upper()
match = re.search(RE_SINGLE_PARAM_TYPE, sqlite_type)
if match:
(type_name, param) = match.groups()
if type_name in VARCHAR_ALIASES:
type_name = "VARCHAR"
return "%s(%s)" % (type_name, param)
match = re.search(RE_DOUBLE_PARAM_TYPE, sqlite_type)
if match:
# Only NUMERIC and DECIMAL have double parameters, which both exist
# in PostgreSQL as well.
return "%s(%s,%s)" % match.groups()
if sqlite_type == "DATETIME":
return "TIMESTAMP WITHOUT TIME ZONE"
# from: https://www.sqlite.org/datatype3.html#determination_of_column_affinity
Expand All @@ -82,11 +106,11 @@ def sqlite_to_postgres_type(sqlite_type: str) -> str:
return "TEXT"
# If the declared type for a column contains the string "BLOB" or if no type is specified then the column has affinity BLOB.
if "BLOB" in sqlite_type:
return "BLOB"
return "BYTEA"
# If the declared type for a column contains any of the strings "REAL", "FLOA", or "DOUB" then the column has REAL affinity.
if "REAL" in sqlite_type or "FLOA" in sqlite_type or "DOUB" in sqlite_type:
return "REAL"
# Otherwise, the affinity is NUMERIC. TODO: Precision and scale
# Otherwise, the affinity is NUMERIC.
return "NUMERIC"


Expand All @@ -110,13 +134,23 @@ def sqlite_connection_to_introspection_result(con: sqlite3.Connection) -> Intros
return schema


BINARY_DATA_MESSAGE = "[binary data]"


def sanitize_preview_row(row: sqlite3.Row) -> Dict[str, Any]:
return {k: row[k] if type(row[k]) != bytes else BINARY_DATA_MESSAGE for k in row.keys()}


def get_preview_rows(
con: sqlite3.Connection, table_name: str, limit: Optional[int] = 10
) -> Union[MountError, List[Dict[str, Any]]]:
# TODO: catch errors and return them as MountErrors
return query_connection(
con, "SELECT * FROM {} LIMIT {}".format(_quote_ident(table_name), limit) # nosec
)
return [
sanitize_preview_row(row)
for row in query_connection(
con, "SELECT * FROM {} LIMIT {}".format(_quote_ident(table_name), limit) # nosec
)
]


class SQLiteDataSource(LoadableDataSource, PreviewableDataSource):
Expand Down
17 changes: 17 additions & 0 deletions test/splitgraph/ingestion/test_sqlite.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from splitgraph.ingestion.sqlite import sqlite_to_postgres_type


def test_type_mapping():
assert sqlite_to_postgres_type("INT") == "INTEGER"
assert sqlite_to_postgres_type("TEXT") == "TEXT"
assert sqlite_to_postgres_type("text") == "TEXT"
assert sqlite_to_postgres_type("varchar(255)") == "VARCHAR(255)"
assert sqlite_to_postgres_type("VARCHAR(255)") == "VARCHAR(255)"
assert sqlite_to_postgres_type("REAL") == "REAL"
assert sqlite_to_postgres_type("real") == "REAL"
assert sqlite_to_postgres_type("decimal(2,10)") == "DECIMAL(2,10)"
assert sqlite_to_postgres_type("decimal(20,100)") == "DECIMAL(20,100)"
assert sqlite_to_postgres_type("BLOB") == "BYTEA"
assert sqlite_to_postgres_type("decimal(2, 20)") == "DECIMAL(2,20)"
assert sqlite_to_postgres_type("NATIVE CHARACTER(70)") == "VARCHAR(70)"
assert sqlite_to_postgres_type("NVARCHAR(160)") == "VARCHAR(160)"

0 comments on commit c4b6b08

Please sign in to comment.