Skip to content

Commit

Permalink
_search= queries now correctly escaped, fixes #651
Browse files Browse the repository at this point in the history
Queries with reserved words or characters according to the SQLite
FTS5 query language could cause errors.

Queries are now escaped like so:

    dog cat => "dog" "cat"
  • Loading branch information
simonw committed Dec 29, 2019
1 parent 59e7014 commit 3c861f3
Show file tree
Hide file tree
Showing 6 changed files with 47 additions and 2 deletions.
1 change: 1 addition & 0 deletions datasette/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
"datasette.publish.now",
"datasette.publish.cloudrun",
"datasette.facets",
"datasette.sql_functions",
)

pm = pluggy.PluginManager("datasette")
Expand Down
7 changes: 7 additions & 0 deletions datasette/sql_functions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from datasette import hookimpl
from datasette.utils import escape_fts


@hookimpl
def prepare_connection(conn):
conn.create_function("escape_fts", 1, escape_fts)
14 changes: 14 additions & 0 deletions datasette/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -758,6 +758,20 @@ def format_bytes(bytes):
return "{:.1f} {}".format(current, unit)


_escape_fts_re = re.compile(r'\s+|(".*?")')


def escape_fts(query):
# If query has unbalanced ", add one at end
if query.count('"') % 2:
query += '"'
bits = _escape_fts_re.split(query)
bits = [b for b in bits if b and b != '""']
return " ".join(
'"{}"'.format(bit) if not bit.startswith('"') else bit for bit in bits
)


class RequestParameters(dict):
def get(self, name, default=None):
"Return first value in the list, if available"
Expand Down
4 changes: 2 additions & 2 deletions datasette/views/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -361,7 +361,7 @@ async def data(
# Simple ?_search=xxx
search = search_args["_search"]
where_clauses.append(
"{fts_pk} in (select rowid from {fts_table} where {fts_table} match :search)".format(
"{fts_pk} in (select rowid from {fts_table} where {fts_table} match escape_fts(:search))".format(
fts_table=escape_sqlite(fts_table), fts_pk=escape_sqlite(fts_pk)
)
)
Expand All @@ -375,7 +375,7 @@ async def data(
raise DatasetteError("Cannot search by that column", status=400)

where_clauses.append(
"rowid in (select rowid from {fts_table} where {search_col} match :search_{i})".format(
"rowid in (select rowid from {fts_table} where {search_col} match escape_fts(:search_{i}))".format(
fts_table=escape_sqlite(fts_table),
search_col=escape_sqlite(search_col),
i=i,
Expand Down
5 changes: 5 additions & 0 deletions tests/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,11 @@ def test_sortable_columns_metadata(app_client):
[2, "terry dog", "sara weasel", "puma"],
],
),
(
# Special keyword shouldn't break FTS query
"/fixtures/searchable.json?_search=AND",
[],
),
(
"/fixtures/searchable.json?_search=weasel",
[[2, "terry dog", "sara weasel", "puma"]],
Expand Down
18 changes: 18 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -388,3 +388,21 @@ def test_path_with_format(path, format, extra_qs, expected):
)
def test_format_bytes(bytes, expected):
assert expected == utils.format_bytes(bytes)


@pytest.mark.parametrize(
"query,expected",
[
("dog", '"dog"'),
("cat,", '"cat,"'),
("cat dog", '"cat" "dog"'),
# If a phrase is already double quoted, leave it so
('"cat dog"', '"cat dog"'),
('"cat dog" fish', '"cat dog" "fish"'),
# Sensibly handle unbalanced double quotes
('cat"', '"cat"'),
('"cat dog" "fish', '"cat dog" "fish"'),
],
)
def test_escape_fts(query, expected):
assert expected == utils.escape_fts(query)

0 comments on commit 3c861f3

Please sign in to comment.