Skip to content

Commit

Permalink
Initial storage change tracking implementation. #323
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed Feb 17, 2024
1 parent f7873a0 commit 36203b5
Show file tree
Hide file tree
Showing 8 changed files with 378 additions and 1 deletion.
9 changes: 9 additions & 0 deletions src/reader/_storage/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
from __future__ import annotations

from typing import Any

from .._types import SearchType
from ._base import StorageBase
from ._changes import Changes
from ._entries import EntriesMixin
from ._feeds import FeedsMixin
from ._tags import TagsMixin
Expand All @@ -25,6 +28,12 @@ class Storage(FeedsMixin, EntriesMixin, TagsMixin, StorageBase):
"""

def __init__(self, *args: Any, **kwargs: Any):
# FIXME: types
# FIXME: protocol
super().__init__(*args, **kwargs)
self.changes = Changes(self)

def make_search(self) -> SearchType:
from ._search import Search

Expand Down
203 changes: 203 additions & 0 deletions src/reader/_storage/_changes.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,203 @@
from __future__ import annotations

from typing import Any
from typing import TYPE_CHECKING

from .._types import Action
from .._types import Change
from ..exceptions import StorageError
from ._sql_utils import parse_schema
from ._sql_utils import Query
from ._sqlite_utils import ddl_transaction
from ._sqlite_utils import wrap_exceptions

if TYPE_CHECKING: # pragma: no cover
from ._base import StorageBase


class Changes:
# FIXME: protocol

def __init__(self, storage: StorageBase):
self.storage = storage

@wrap_exceptions(StorageError)
def enable(self) -> None:
# FIXME: already enabled exc
with ddl_transaction(self.storage.get_db()) as db:
for objects in SCHEMA.values():
for object in objects.values():
object.create(db)
db.execute("UPDATE entries SET sequence = randomblob(16)")
db.execute(
"""
INSERT INTO changes
SELECT sequence, feed, id, '', 1 FROM entries
"""
)

@wrap_exceptions(StorageError)
def disable(self) -> None:
# FIXME: already enabled exc
with ddl_transaction(self.storage.get_db()) as db:
for objects in SCHEMA.values():
for object in objects.values():
db.execute(f"DROP {object.type} {object.name}")
db.execute("UPDATE entries SET sequence = NULL")

@wrap_exceptions(StorageError)
def get(
self, action: Action | None = None, limit: int | None = None
) -> list[Change]:
# FIXME: not enabled exc
context = {
'limit': min(limit or self.storage.chunk_size, self.storage.chunk_size)
}
# the ORDER_BY is only used for testing; should this return a set instead?
query = Query().SELECT('*').FROM('changes').ORDER_BY('rowid').LIMIT(':limit')
if action:
query.WHERE('action = :action')
context['action'] = action.value
rows = self.storage.get_db().execute(str(query), context)
return list(map(change_factory, rows))

@wrap_exceptions(StorageError)
def done(self, changes: list[Change]) -> None:
# FIXME: not enabled exc
# FIXME: len(changes) <= self.storage.chunk_size
with self.storage.get_db() as db:
for change in changes:
db.execute(
"""
DELETE FROM changes
WHERE (sequence, feed, id, key, action)
= (:sequence, :feed, :id, :key, :action)
""",
change_to_dict(change),
)


def change_factory(row: tuple[Any, ...]) -> Change:
sequence, feed, id, key, action = row
return Change(
Action(action),
sequence,
feed or None,
id or None,
key or None,
)


def change_to_dict(change: Change) -> dict[str, Any]:
return dict(
sequence=change.sequence,
feed=change.feed_url or '',
id=change.entry_id or '',
key=change.tag_key or '',
action=change.action.value,
)


SCHEMA = parse_schema("""
CREATE TABLE changes (
sequence BLOB NOT NULL,
feed TEXT NOT NULL,
id TEXT NOT NULL,
key TEXT NOT NULL,
action INTEGER NOT NULL,
PRIMARY KEY (sequence, feed, id, key)
);
CREATE TRIGGER changes_entry_insert
AFTER INSERT
ON entries
BEGIN
-- SELECT print(' entry_insert', new.feed, new.id);
UPDATE entries
SET sequence = randomblob(16)
WHERE (new.id, new.feed) = (id, feed);
INSERT OR REPLACE INTO changes
SELECT sequence, feed, id, '', 1
FROM entries
WHERE (feed, id) = (new.feed, new.id);
END;
-- Can't handle feed URL changes in changes_entry_update because
-- those entry updates are a consequence of ON UPDATE CASCADE,
-- which overrides the INSERT OR REPLACE used in the trigger,
-- because "conflict handling policy of the outer statement"
-- takes precedence per https://sqlite.org/lang_createtrigger.html.
-- Instead, we handle feed URL changes in changes_feed_changed.
CREATE TRIGGER changes_entry_update
AFTER UPDATE
OF title, summary, content
ON entries
WHEN
new.id = old.id AND new.feed = old.feed AND (
coalesce(new.title, '') != coalesce(old.title, '')
OR coalesce(new.summary, '') != coalesce(old.summary, '')
OR coalesce(new.content, '') != coalesce(old.content, '')
)
BEGIN
-- SELECT print(' entry_update', old.feed, old.id, '->', new.feed, new.id);
INSERT OR REPLACE INTO changes
VALUES (old.sequence, old.feed, old.id, '', 2);
UPDATE entries
SET sequence = randomblob(16)
WHERE (new.id, new.feed) = (id, feed);
INSERT OR REPLACE INTO changes
SELECT sequence, feed, id, '', 1
FROM entries
WHERE (feed, id) = (new.feed, new.id);
END;
CREATE TRIGGER changes_entry_delete
AFTER DELETE
ON entries
BEGIN
-- SELECT print(' entry_delete', old.feed, old.id);
INSERT OR REPLACE INTO changes
VALUES (old.sequence, old.feed, old.id, '', 2);
END;
CREATE TRIGGER changes_feed_changed
AFTER UPDATE
OF url, title, user_title
ON feeds
WHEN
new.url != old.url
OR coalesce(new.title, '') != coalesce(old.title, '')
OR coalesce(new.user_title, '') != coalesce(old.user_title, '')
BEGIN
-- SELECT print(' feed_url_change', old.url, '->', new.url);
INSERT OR REPLACE INTO changes
SELECT sequence, old.url, id, '', 2
FROM entries
WHERE feed = new.url;
UPDATE entries
SET sequence = randomblob(16)
WHERE feed = new.url;
INSERT OR REPLACE INTO changes
SELECT sequence, feed, id, '', 1
FROM entries
WHERE feed = new.url;
END;
""") # fmt: skip
5 changes: 4 additions & 1 deletion src/reader/_storage/_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ def get_entries_query(
entries.added_by
entries.last_updated
entries.original_feed
entries.sequence
""".split()
)
.FROM("entries")
Expand Down Expand Up @@ -454,7 +455,8 @@ def entry_factory(row: tuple[Any, ...]) -> Entry:
added_by,
last_updated,
original_feed,
) = row[12:29]
sequence,
) = row[12:30]
return Entry(
id,
convert_timestamp(updated) if updated else None,
Expand All @@ -473,6 +475,7 @@ def entry_factory(row: tuple[Any, ...]) -> Entry:
added_by,
convert_timestamp(last_updated),
original_feed or feed.url,
sequence,
feed,
)

Expand Down
1 change: 1 addition & 0 deletions src/reader/_storage/_feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def change_feed_url(self, old: str, new: str) -> None:
except sqlite3.IntegrityError as e:
if "unique constraint failed" not in str(e).lower(): # pragma: no cover
raise
# FIXME: check message for the failed constraint, same for add feed
raise FeedExistsError(new) from None
else:
rowcount_exactly_one(cursor, lambda: FeedNotFoundError(old))
Expand Down
1 change: 1 addition & 0 deletions src/reader/_storage/_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@
first_updated_epoch TIMESTAMP NOT NULL,
feed_order INTEGER NOT NULL,
recent_sort TIMESTAMP NOT NULL,
sequence BLOB, -- FIXME: needs migration!
PRIMARY KEY (id, feed),
FOREIGN KEY (feed) REFERENCES feeds(url)
Expand Down
17 changes: 17 additions & 0 deletions src/reader/_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from dataclasses import dataclass
from datetime import datetime
from datetime import timezone
from enum import Enum
from functools import cached_property
from types import MappingProxyType
from types import SimpleNamespace
Expand Down Expand Up @@ -1261,3 +1262,19 @@ def search_entry_counts(
StorageError
"""


class Action(Enum):
# FIXME: docstring
INSERT = 1
DELETE = 2


@dataclass(frozen=True)
class Change:
# FIXME: docstring
action: Action
sequence: bytes
feed_url: str | None = None
entry_id: str | None = None
tag_key: str | None = None
3 changes: 3 additions & 0 deletions src/reader/types.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,6 +296,9 @@ def feed_url(self) -> str:
#: .. versionadded:: 1.8
original_feed_url: str = cast(str, None)

# FIXME: docstring
_sequence: bytes | None = None

# feed should not have a default, but I'd prefer objects that aren't
# entry data to be at the end, and dataclasses don't support keyword-only
# arguments yet.
Expand Down
Loading

0 comments on commit 36203b5

Please sign in to comment.