Skip to content

Commit

Permalink
gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#…
Browse files Browse the repository at this point in the history
…108657)

Co-authored-by: Erlend E. Aasland <erlend@python.org>
  • Loading branch information
CorvinM and erlend-aasland committed Aug 30, 2023
1 parent 210a5d7 commit 400a1ce
Show file tree
Hide file tree
Showing 3 changed files with 41 additions and 2 deletions.
27 changes: 25 additions & 2 deletions Lib/sqlite3/dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,10 @@
# future enhancements, you should normally quote any identifier that
# is an English language word, even if you do not have to."


from contextlib import contextmanager


def _quote_name(name):
return '"{0}"'.format(name.replace('"', '""'))

Expand All @@ -15,6 +19,24 @@ def _quote_value(value):
return "'{0}'".format(value.replace("'", "''"))


def _force_decode(bs, *args, **kwargs):
# gh-108590: Don't fail if the database contains invalid Unicode data.
try:
return bs.decode(*args, **kwargs)
except UnicodeDecodeError:
return "".join([chr(c) for c in bs])


@contextmanager
def _text_factory(con, factory):
saved_factory = con.text_factory
con.text_factory = factory
try:
yield
finally:
con.text_factory = saved_factory


def _iterdump(connection):
"""
Returns an iterator to the dump of the database in an SQL text format.
Expand Down Expand Up @@ -74,8 +96,9 @@ def _iterdump(connection):
)
)
query_res = cu.execute(q)
for row in query_res:
yield("{0};".format(row[0]))
with _text_factory(connection, bytes):
for row in query_res:
yield("{0};".format(_force_decode(row[0])))

# Now when the type is 'index', 'trigger', or 'view'
q = """
Expand Down
15 changes: 15 additions & 0 deletions Lib/test/test_sqlite3/test_dump.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,6 +133,21 @@ def test_dump_virtual_tables(self):
actual = list(self.cx.iterdump())
self.assertEqual(expected, actual)

def test_dump_unicode_invalid(self):
# gh-108590
expected = [
"BEGIN TRANSACTION;",
"CREATE TABLE foo (data TEXT);",
"INSERT INTO \"foo\" VALUES('a\x9f');",
"COMMIT;",
]
self.cu.executescript("""
CREATE TABLE foo (data TEXT);
INSERT INTO foo VALUES (CAST(X'619f' AS TEXT));
""")
actual = list(self.cx.iterdump())
self.assertEqual(expected, actual)


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fixed an issue where :meth:`sqlite3.Connection.iterdump` would fail and leave an incomplete SQL dump if a table includes invalid Unicode sequences. Patch by Corvin McPherson

0 comments on commit 400a1ce

Please sign in to comment.