Skip to content

Commit

Permalink
Clean up should_update_feed() (always update feed if entries changed).
Browse files Browse the repository at this point in the history
  • Loading branch information
lemon24 committed May 6, 2024
1 parent e6a3853 commit ed7e284
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 37 deletions.
45 changes: 20 additions & 25 deletions src/reader/_update.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,42 +114,39 @@ def stale(self) -> bool:

def should_update_feed(self, new: FeedData, entries_to_update: bool) -> bool:
old = self.old_feed
self.log.debug("old updated %s, new updated %s", old.updated, new.updated)

if self.stale:
# logging for stale happened in process_feed_for_update()
return True

if not old.last_updated:
self.log.info("feed has no last_updated, treating as updated")
assert not old.updated, "updated must be None if last_updated is None"
return True

if not new.updated:
self.log.info("feed has no updated, treating as updated")
return True

if self.stale:
# logging for stale happened in process_feed_for_update()
return True

# we only care if feed.updated changed if any entries changed:
# https://github.com/lemon24/reader/issues/231#issuecomment-812601988
#
# for RSS, if there's no date element,
# feedparser user lastBuildDate as .updated,
# which may (obviously) change without the feed actually changing
#
if entries_to_update and (not old.updated or new.updated > old.updated):
self.log.info("feed updated")
# Some feeds have entries newer than the feed;
# we always update the feed if entries changed, for simplicity.
# https://github.com/lemon24/reader/issues/76
if entries_to_update:
self.log.info("feed has entries to update, treating as updated")
return True

# check if the feed content actually changed:
# Check if the feed content actually changed:
# https://github.com/lemon24/reader/issues/179
if not old.hash or new.hash != old.hash:
self.log.debug("feed hash changed, treating as updated")
self.log.info("feed hash changed, treating as updated")
return True

# Some feeds have entries newer than the feed.
# https://github.com/lemon24/reader/issues/76
self.log.info("feed not updated, updating entries anyway")
# For RSS feeds with no date element,
# feedparser user lastBuildDate as .updated,
# which can change without the feed actually changing,
# so feed.updated is excluded from the hash.
# https://github.com/lemon24/reader/issues/231#issuecomment-812601988
if new.updated != old.updated:
self.log.info("only feed updated changed, skipping")
return False

self.log.info("feed not updated, skipping")
return False

def should_update_entry(
Expand Down Expand Up @@ -236,8 +233,6 @@ def get_feed_to_update(
parsed_feed.http_etag,
parsed_feed.http_last_modified,
)
if entries_to_update:
return FeedUpdateIntent(self.url, self.now)
return None

def update(
Expand Down
23 changes: 11 additions & 12 deletions tests/test_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -113,7 +113,7 @@ def test_update_feed_updated(reader, update_feed, caplog):
last_updated=datetime(2010, 1, 3),
),
}
assert "feed not updated, updating entries anyway" in caplog.text
assert "feed has entries to update, treating as updated" in caplog.text
caplog.clear()

# Feed gets updated because content (hash) changed.
Expand All @@ -130,7 +130,7 @@ def test_update_feed_updated(reader, update_feed, caplog):
assert "feed hash changed, treating as updated" in caplog.text
caplog.clear()

# The feed doesn't change, because .updated is older.
# The feed changes if entries changed, regardless of .updated being older.
# Entries get updated regardless.
old_feed = parser.feed(1, datetime(2009, 1, 1), title='old-different-title')
entry_three = parser.entry(1, 3, datetime(2010, 2, 1))
Expand All @@ -141,9 +141,7 @@ def test_update_feed_updated(reader, update_feed, caplog):

feed = old_feed.as_feed(
added=datetime(2010, 1, 1),
# doesn't change because it's not newer
updated=datetime(2010, 1, 1),
# changes because entries changed
updated=datetime(2009, 1, 1),
last_updated=datetime(2010, 1, 4),
)
assert set(reader.get_entries()) == {
Expand All @@ -163,10 +161,10 @@ def test_update_feed_updated(reader, update_feed, caplog):
last_updated=datetime(2010, 1, 4),
),
}
assert "feed not updated, updating entries anyway" in caplog.text
assert "feed has entries to update, treating as updated" in caplog.text
caplog.clear()

# The feed doesn't change; despite being newer, no entries have changed.
# The feed doesn't change if only .updated changes (and there are no updated entries).
old_feed = parser.feed(1, datetime(2010, 1, 2), title='old-different-title')
reader._now = lambda: datetime(2010, 1, 4, 12)

Expand All @@ -175,8 +173,8 @@ def test_update_feed_updated(reader, update_feed, caplog):

feed = old_feed.as_feed(
added=datetime(2010, 1, 1),
# doesn't change because no entries have changed
updated=datetime(2010, 1, 1),
# doesn't change because nothing except .updated changed
updated=datetime(2009, 1, 1),
# doesn't change because nothing changed
last_updated=datetime(2010, 1, 4),
)
Expand All @@ -197,10 +195,11 @@ def test_update_feed_updated(reader, update_feed, caplog):
last_updated=datetime(2010, 1, 4),
),
}
assert "feed not updated, updating entries anyway" in caplog.text
assert "only feed updated changed, skipping" in caplog.text
caplog.clear()

# The feeds changes because it is newer *and* entries get updated.
# The feeds changes because ~~it is newer *and*~~ entries get updated.
# TODO: same as "The feed changes if entries changed, regardless ...", remove?
new_feed = parser.feed(1, datetime(2010, 1, 2), title='new')
entry_four = parser.entry(1, 4, datetime(2010, 2, 1))
reader._now = lambda: datetime(2010, 1, 5)
Expand Down Expand Up @@ -233,7 +232,7 @@ def test_update_feed_updated(reader, update_feed, caplog):
last_updated=datetime(2010, 1, 5),
),
}
assert "feed updated" in caplog.text
assert "feed has entries to update, treating as updated" in caplog.text
caplog.clear()


Expand Down

0 comments on commit ed7e284

Please sign in to comment.