Skip to content

Commit

Permalink
.enable_fts(..., replace=True) argument, closes #160
Browse files Browse the repository at this point in the history
  • Loading branch information
simonw committed Sep 20, 2020
1 parent 3cc1944 commit ecb50c8
Show file tree
Hide file tree
Showing 4 changed files with 97 additions and 5 deletions.
10 changes: 10 additions & 0 deletions docs/python-api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -1208,6 +1208,16 @@ You can customize the tokenizer configured for the table using the ``tokenize=``
The SQLite documentation has more on `FTS5 tokenizers <https://www.sqlite.org/fts5.html#tokenizers>`__ and `FTS4 tokenizers <https://www.sqlite.org/fts3.html#tokenizer>`__. ``porter`` is a valid option for both.
If you attempt to configure a FTS table where one already exists, a ``sqlite3.OperationalError`` exception will be raised.
You can replace the existing table with a new configuration using ``replace=True``:
.. code-block:: python
db["articles"].enable_fts(["headline"], tokenize="porter", replace=True)
This will have no effect if the FTS table already exists, otherwise it will drop and recreate the table with the new settings. This takes into consideration the columns, the tokenizer, the FTS version used and whether or not the table has triggers.
To remove the FTS tables and triggers you created, use the ``disable_fts()`` table method:
.. code-block:: python
Expand Down
31 changes: 27 additions & 4 deletions sqlite_utils/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -814,16 +814,21 @@ def add_foreign_key(self, column, other_table=None, other_column=None):
self.db.add_foreign_keys([(self.name, column, other_table, other_column)])

def enable_fts(
self, columns, fts_version="FTS5", create_triggers=False, tokenize=None
self,
columns,
fts_version="FTS5",
create_triggers=False,
tokenize=None,
replace=False,
):
"Enables FTS on the specified columns."
sql = (
create_fts_sql = (
textwrap.dedent(
"""
CREATE VIRTUAL TABLE [{table}_fts] USING {fts_version} (
{columns},{tokenize}
content=[{table}]
);
)
"""
)
.strip()
Expand All @@ -834,7 +839,25 @@ def enable_fts(
tokenize="\n tokenize='{}',".format(tokenize) if tokenize else "",
)
)
self.db.executescript(sql)
should_recreate = False
if replace and self.db["{}_fts".format(self.name)].exists():
# Does the table need to be recreated?
fts_schema = self.db["{}_fts".format(self.name)].schema
if fts_schema != create_fts_sql:
should_recreate = True
expected_triggers = {self.name + suffix for suffix in ("_ai", "_ad", "_au")}
existing_triggers = {t.name for t in self.triggers}
has_triggers = existing_triggers.issuperset(expected_triggers)
if has_triggers != create_triggers:
should_recreate = True
if not should_recreate:
# Table with correct configuration already exists
return self

if should_recreate:
self.disable_fts()

self.db.executescript(create_fts_sql)
self.populate_fts(columns)

if create_triggers:
Expand Down
59 changes: 59 additions & 0 deletions tests/test_fts.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,3 +217,62 @@ def test_rebuild_removes_junk_docsize_rows(tmpdir, fts_version):
# rebuild should fix this:
db["licenses_fts"].rebuild_fts()
assert db["licenses_fts_docsize"].count == 2


@pytest.mark.parametrize(
"kwargs",
[
{"columns": ["title"]},
{"fts_version": "FTS4"},
{"create_triggers": True},
{"tokenize": "porter"},
],
)
def test_enable_fts_replace(kwargs):
db = Database(memory=True)
db["books"].insert(
{
"id": 1,
"title": "Habits of Australian Marsupials",
"author": "Marlee Hawkins",
},
pk="id",
)
db["books"].enable_fts(["title", "author"])
assert not db["books"].triggers
assert db["books_fts"].columns_dict.keys() == {"title", "author"}
assert "FTS5" in db["books_fts"].schema
assert "porter" not in db["books_fts"].schema
# Now modify the FTS configuration
should_have_changed_columns = "columns" in kwargs
if "columns" not in kwargs:
kwargs["columns"] = ["title", "author"]
db["books"].enable_fts(**kwargs, replace=True)
# Check that the new configuration is correct
if should_have_changed_columns:
assert db["books_fts"].columns_dict.keys() == set(["title"])
if "create_triggers" in kwargs:
assert db["books"].triggers
if "fts_version" in kwargs:
assert "FTS4" in db["books_fts"].schema
if "tokenize" in kwargs:
assert "porter" in db["books_fts"].schema


def test_enable_fts_replace_does_nothing_if_args_the_same():
queries = []
db = Database(memory=True, tracer=lambda sql, params: queries.append((sql, params)))
db["books"].insert(
{
"id": 1,
"title": "Habits of Australian Marsupials",
"author": "Marlee Hawkins",
},
pk="id",
)
db["books"].enable_fts(["title", "author"], create_triggers=True)
queries.clear()
# Running that again shouldn't run much SQL:
db["books"].enable_fts(["title", "author"], create_triggers=True, replace=True)
# The only SQL that executed should be select statements
assert all(q[0].startswith("select ") for q in queries)
2 changes: 1 addition & 1 deletion tests/test_tracer.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_tracer():
("INSERT INTO [dogs] ([name]) VALUES (?);", ["Cleopaws"]),
("select name from sqlite_master where type = 'view'", None),
(
"CREATE VIRTUAL TABLE [dogs_fts] USING FTS5 (\n [name],\n content=[dogs]\n);",
"CREATE VIRTUAL TABLE [dogs_fts] USING FTS5 (\n [name],\n content=[dogs]\n)",
None,
),
(
Expand Down

0 comments on commit ecb50c8

Please sign in to comment.