Skip to content

Commit

Permalink
Merge 1591833 into 7d727ad
Browse files Browse the repository at this point in the history
  • Loading branch information
jerch committed Apr 28, 2022
2 parents 7d727ad + 1591833 commit df59ccb
Show file tree
Hide file tree
Showing 2 changed files with 132 additions and 8 deletions.
1 change: 1 addition & 0 deletions example/example/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,7 @@
}
elif DBENGINE == 'mysql8':
# docker run --name some-mysql -e MYSQL_ROOT_PASSWORD=root -e MYSQL_DATABASE=database -p 6603:3306 -d mysql
# mysql:5.7.38 for mysql_old
DATABASES = {
'default': {
'ENGINE': 'django.db.backends.mysql',
Expand Down
139 changes: 131 additions & 8 deletions fast_update/fast.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,12 @@ def get_vendor(conn: BaseDatabaseWrapper) -> str:

if conn.vendor == 'sqlite':
_conn = cast(Any, conn)
major, minor, _ = _conn.Database.sqlite_version_info
if (major == 3 and minor > 32) or major > 3:
if _conn.Database.sqlite_version_info >= (3, 33):
SEEN_CONNECTIONS[conn] = 'sqlite'
return 'sqlite'
elif _conn.Database.sqlite_version_info >= (3, 15):
SEEN_CONNECTIONS[conn] = 'sqlite_cte'
return 'sqlite_cte'
else: # pragma: no cover
logger.warning('unsupported sqlite backend, fast_update will fall back to bulk_update')
SEEN_CONNECTIONS[conn] = ''
Expand All @@ -63,10 +65,9 @@ def get_vendor(conn: BaseDatabaseWrapper) -> str:
c.execute("SELECT column_1 FROM (VALUES ROW(1, 'zzz'), ROW(2, 'yyy')) as foo")
SEEN_CONNECTIONS[conn] = 'mysql8'
return 'mysql8'
except ProgrammingError: # pragma: no cover
logger.warning('unsupported mysql backend, fast_update will fall back to bulk_update')
SEEN_CONNECTIONS[conn] = ''
return ''
except ProgrammingError:
SEEN_CONNECTIONS[conn] = 'mysql_old'
return 'mysql_old'

logger.warning('unsupported db backend, fast_update will fall back to bulk_update')
SEEN_CONNECTIONS[conn] = ''
Expand Down Expand Up @@ -127,6 +128,51 @@ def as_sqlite(
return f'UPDATE "{tname}" SET {cols} FROM (VALUES {values}) AS "{dname}" WHERE {where}'


def as_sqlite_cte(
tname: str,
pkname: str,
fields: Sequence[Field],
rows: List[str],
count: int,
compiler: SQLCompiler,
connection: BaseDatabaseWrapper
) -> str:
"""
sqlite >=3.15 <3.33 does not support yet the FROM VALUES pattern, but has
CTE support (>=3.8) with row assignment in UPDATE (>=3.15).
So we can use CTE to construct the values table upfront with UNION ALL,
and do row level update in the SET clause.
To limit the updated rows we normally would do a correlated existance test
in the update WHERE clause:
WHERE EXISTS (SELECT 1 FROM target WHERE target.pk = cte.pk)
but this shows very bad performance due to repeated rescanning of the
values table. We can achieve the same filter condition by providing
the pks with an IN test:
WHERE target.pk in (pk1, pk2, ...)
This sacrifices some bandwidth, but is only ~40% slower than the
FROM VALUES table join.
"""
# TODO: needs proper field names escaping
# FIXME: CTE pattern does not set rowcount correctly, needs patch in update_from_values
# FIXME: pk values in where need placeholder calc
# FIXME: dont use pk as fixed col name, proper escape of col names
dname = 'd' if tname != 'd' else 'c'
cols = ', '.join([f.column for f in fields])
values = ' UNION ALL '.join([' SELECT ' + row[1:-1] for row in rows])
where = f'"{tname}"."{pkname}"={dname}.pk'
return (
f'WITH {dname}(pk, {cols}) AS ({values}) '
f'UPDATE "{tname}" '
f'SET ({cols}) = (SELECT {cols} FROM {dname} WHERE {where}) '
f'WHERE "{tname}"."{pkname}" in ({",".join(["%s"]*count)})'
)


def as_mysql(
tname: str,
pkname: str,
Expand Down Expand Up @@ -182,12 +228,85 @@ def as_mysql8(
return f'UPDATE `{tname}` INNER JOIN (VALUES {values}) AS {dname} ON {on} SET {cols}'


def as_mysql_old(
tname: str,
pkname: str,
fields: Sequence[Field],
rows: List[str],
count: int,
compiler: SQLCompiler,
connection: BaseDatabaseWrapper
) -> str:
"""
Workaround for older MySQL (<8.0) and MariaDB (<10.3) versions.
"""
# FIXME: better first values calc
# FIXME: this dies much earlier from mysql stack limit?
# TODO: test MariaDB 10.2
# TODO: Is this better than using half broken TVC in MariaDB 10.3+?
dname = 'd' if tname != 'd' else 'c'
cols = ','.join(f'`{tname}`.`{f.column}`=`{dname}`.`{f.column}`' for f in fields)
colnames = [pkname] + [f.column for f in fields]
first = ' SELECT ' + ', '.join([f'{ph} AS `{colname}`' for ph, colname in zip(rows[0][1:-1].split(','), colnames)])
later = ' UNION ALL '.join([' SELECT ' + row[1:-1] for row in rows[1:]])
values = f'{first} UNION ALL {later}' if later else first
where = f'`{tname}`.`{pkname}` = `{dname}`.`{pkname}`'
return (
f'UPDATE `{tname}`, ({values}) {dname} '
f'SET {cols} WHERE {where}'
)


# possible scheme for oracle 21
# UPDATE (SELECT taa.n n,
# taa.t1 tt1,
# taa.t2 tt2,
# ta1.num num,
# ta1.letter letter,
# ta1.hmm hmm
# FROM ttt taa,
# (SELECT 1 AS num, 'one' AS letter, 'a' as hmm FROM dual
# UNION ALL SELECT 2, 'two', 'b' FROM dual
# UNION ALL SELECT 3, 'three', 'c' FROM dual) ta1
# WHERE num = n)
# SET tt1 = letter,
# tt2 = hmm

# possible scheme for oracle 18+
# unclear: bad runtime due to subquery re-eval?
# UPDATE ttt ta1
# SET (t1, t2) = (SELECT ta2.letter, ta2.hmm FROM (SELECT 1 AS num, 'one' AS letter, 'a' as hmm FROM dual
# UNION ALL SELECT 2, 'two', 'b' FROM dual
# UNION ALL SELECT 3, 'three', 'c' FROM dual) ta2
# WHERE ta1.n = ta2.num)
# WHERE ta1.n in (1,2,3);

# more elegant with own type in oracle?
# create type t as object (a varchar2(10), b varchar2(10), c number);
# create type tt as table of t;
# select * from table( tt (
# t('APPLE', 'FRUIT', 1),
# t('APPLE', 'FRUIT', 1122),
# t('CARROT', 'VEGGIExxxxxxxxxxx', 3),
# t('PEACH', 'FRUIT', 104),
# t('CUCUMBER', 'VEGGIE', 5),
# t('ORANGE', 'FRUIT', 6) ) );

# possible scheme for MSSQL 2014+
# UPDATE ttt
# SET ttt.t1 = temp.v
# FROM (VALUES (1, 'bla'), (2, 'gurr'), (1, 'xxx')) temp(pk, v)
# WHERE ttt.n = temp.pk;


# TODO: Make is pluggable for other vendors? (also support check)
QUERY = {
'sqlite': as_sqlite,
'sqlite_cte': as_sqlite_cte,
'postgresql': as_postgresql,
'mysql': as_mysql,
'mysql8': as_mysql8
'mysql8': as_mysql8,
'mysql_old': as_mysql_old
}


Expand Down Expand Up @@ -221,11 +340,15 @@ def update_from_values(
] if hasattr(f, 'get_placeholder') else default_placeholder)
for pos, f in enumerate(row_fields)
]
rows = [f'({",".join(row)})' for row in zip(*col_placeholders)]
# FIXME: remove paranthesis here, apply late in query functions instead
rows = [f'({", ".join(row)})' for row in zip(*col_placeholders)]
sql = QUERY[vendor](tname, pk_field.column, fields, rows, counter, compiler, connection)
if vendor == 'mysql':
# mysql only: prepend (0,1,2,...) as first row
data = list(range(len(fields) + 1)) + data
elif vendor == 'sqlite_cte':
# append pks a second time for faster WHERE IN narrowing in CTE variant
data += [data[i] for i in range(0, len(data), row_length)]
c.execute(sql, data)
return c.rowcount

Expand Down

0 comments on commit df59ccb

Please sign in to comment.