diff --git a/CHANGES.rst b/CHANGES.rst
index 8fb228a5..b85b3eef 100644
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -14,9 +14,14 @@ Unreleased
* Add ``--json`` option to the ``list`` CLI commands.
Thanks to `Puneet Dixit`_ for the PR.
(:issue:`394`)
-
+* Add :attr:`~Entry.authors` (and corresponding attributes on feeds and sources)
+ to expose multiple authors and rich author data (name, email, URL).
+ The old ``author`` string attribute is deprecated.
+ Thanks to `Anshul Mittal`_ for the PR.
+ (:issue:`391`)
.. _Puneet Dixit: https://github.com/puneetdixit200
+.. _Anshul Mittal: https://github.com/anderson688
Version 3.23
diff --git a/docs/api.rst b/docs/api.rst
index 2e8b295a..7f5e14ba 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -28,6 +28,9 @@ Data objects
.. autoclass:: Feed
:members:
+.. autoclass:: Author
+ :members:
+
.. autoclass:: ExceptionInfo
:members:
diff --git a/docs/conf.py b/docs/conf.py
index 6731bfcb..91a5dafd 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -19,6 +19,8 @@
'werkzeug.exceptions',
'werkzeug.http',
'yaml',
+ 'structlog',
+ 'structlog.contextvars',
]:
sys.modules[name] = unittest.mock.Mock()
diff --git a/docs/guide.rst b/docs/guide.rst
index 097a78bb..c1163302 100644
--- a/docs/guide.rst
+++ b/docs/guide.rst
@@ -374,7 +374,7 @@ As seen in the previous sections,
updated=datetime.datetime(2020, 2, 28, 9, 34, 2, tzinfo=datetime.timezone.utc),
title='Hello Internet',
link='http://www.hellointernet.fm/',
- author='CGP Grey',
+ authors=(Author(name='CGP Grey'),),
subtitle='CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
version='rss20',
user_title=None,
@@ -386,9 +386,10 @@ As seen in the previous sections,
To get all the feeds, use the :meth:`~Reader.get_feeds` method::
>>> for feed in reader.get_feeds():
+ ... authors = ", ".join(a.name for a in feed.authors if a.name) or 'unknown author'
... print(
... feed.title or feed.url,
- ... f"by {feed.author or 'unknown author'},",
+ ... f"by {authors},",
... f"updated on {feed.updated or 'never'}",
... )
...
diff --git a/src/reader/__init__.py b/src/reader/__init__.py
index 51e718ec..1fe2dafc 100644
--- a/src/reader/__init__.py
+++ b/src/reader/__init__.py
@@ -67,6 +67,7 @@
FeedToImport as FeedToImport,
FeedImportResult as FeedImportResult,
FeedExport as FeedExport,
+ Author as Author,
)
from .exceptions import (
diff --git a/src/reader/_app/legacy/templates/entry.html b/src/reader/_app/legacy/templates/entry.html
index 46df1694..b1e1ccb1 100644
--- a/src/reader/_app/legacy/templates/entry.html
+++ b/src/reader/_app/legacy/templates/entry.html
@@ -18,7 +18,15 @@
-
- {% if entry.author %} by {{ entry.author }}{% endif %}
+ {% if entry.authors %}
+ by
+ {% for author in entry.authors -%}
+ {% if author.href %}{% endif %}
+ {{- author.name or author.email or 'unknown' -}}
+ {% if author.href %}{% endif %}
+ {%- if not loop.last %}, {% endif %}
+ {%- endfor %}
+ {% endif %}
in {{ entry.feed_resolved_title or feed.url }}
-
{%- set published = entry.published or entry.updated_not_none -%}
diff --git a/src/reader/_app/templates/macros.html b/src/reader/_app/templates/macros.html
index deb2bd82..99f439cd 100644
--- a/src/reader/_app/templates/macros.html
+++ b/src/reader/_app/templates/macros.html
@@ -202,7 +202,7 @@
{% macro feed_author(feed) %}
-{%- set author = feed.author if feed.author != feed.resolved_title else none -%}
+{%- set author = feed.author_str if feed.author_str != feed.resolved_title else none -%}
{%- if author %}
-
by {{ author }}
@@ -235,7 +235,7 @@
{% macro entry_author(entry) %}
-{%- set author = entry.author or entry.feed.author -%}
+{%- set author = entry.author_str or entry.feed.author_str -%}
{%- set author = author if author != entry.feed_resolved_title else none -%}
{%- if author %}
-
diff --git a/src/reader/_parser/feedparser.py b/src/reader/_parser/feedparser.py
index a1c859be..ccc61407 100644
--- a/src/reader/_parser/feedparser.py
+++ b/src/reader/_parser/feedparser.py
@@ -14,6 +14,7 @@
from .._types import EntryData
from .._types import FeedData
from ..exceptions import ParseError
+from ..types import Author
from ..types import Content
from ..types import Enclosure
from ..types import EntrySource
@@ -91,7 +92,7 @@ def _process_feed(url: str, d: Any) -> tuple[FeedData, list[EntryData]]:
_get_datetime_attr(d.feed, 'updated_parsed'),
d.feed.get('title'),
d.feed.get('link'),
- d.feed.get('author'),
+ _parse_authors(d.feed, is_rss),
d.feed.get('subtitle'),
d.version,
)
@@ -180,7 +181,7 @@ def _process_entry(feed_url: str, entry: Any, is_rss: bool) -> EntryData:
_get_datetime_attr(data, 'updated_parsed'),
source_title,
data.get('link'),
- data.get('author'),
+ _parse_authors(data, is_rss),
data.get('subtitle'),
)
@@ -190,10 +191,70 @@ def _process_entry(feed_url: str, entry: Any, is_rss: bool) -> EntryData:
_get_datetime_attr(entry, 'updated_parsed'),
entry.get('title'),
entry.get('link'),
- entry.get('author'),
+ _parse_authors(entry, is_rss),
_get_datetime_attr(entry, 'published_parsed'),
entry.get('summary'),
tuple(content),
tuple(enclosures),
source,
)
+
+
+def _parse_rss_authors(raw_author: str) -> list[dict[str, str | None]]:
+ authors = []
+ for part in raw_author.split(','):
+ part = part.strip()
+ if not part:
+ continue
+
+ name, email = part, None
+ if part.endswith(')') and '(' in part:
+ split_idx = part.rfind('(')
+ name = part[:split_idx].strip()
+ email = part[split_idx + 1 : -1].strip()
+
+ if name or email:
+ authors.append({'name': name or None, 'email': email or None})
+
+ return authors
+
+
+def _parse_authors(thing: Any, is_rss: bool) -> tuple[Author, ...]:
+ author_detail = thing.get('author_detail') or {}
+
+ # 1. Atom
+ if not is_rss and author_detail:
+ name = author_detail.get('name')
+ email = author_detail.get('email')
+ href = author_detail.get('href')
+ if name or email or href:
+ return (Author(name=name or None, email=email or None, href=href or None),)
+ return ()
+
+ # 2. RSS
+ raw_author = thing.get('author', '')
+
+ # Fallback to author_detail if string is entirely empty or "()"
+ if not raw_author or raw_author == '()':
+ name = author_detail.get('name')
+ email = author_detail.get('email')
+ href = author_detail.get('href')
+ if name or email or href:
+ return (Author(name=name or None, email=email or None, href=href or None),)
+ return ()
+
+ # Split by comma
+ authors = _parse_rss_authors(raw_author)
+
+ # Fallback from author_detail if no emails were found in the string
+ if authors:
+ fallback_email = author_detail.get('email')
+ fallback_href = author_detail.get('href')
+
+ for a in authors:
+ if fallback_email and not a['email']:
+ a['email'] = fallback_email
+ if fallback_href:
+ a['href'] = fallback_href
+
+ return tuple(Author(**a) for a in authors)
diff --git a/src/reader/_parser/jsonfeed.py b/src/reader/_parser/jsonfeed.py
index 1f2db285..67b557e4 100644
--- a/src/reader/_parser/jsonfeed.py
+++ b/src/reader/_parser/jsonfeed.py
@@ -15,6 +15,7 @@
from .._types import EntryData
from .._types import FeedData
from ..exceptions import ParseError
+from ..types import Author
from ..types import Content
from ..types import Enclosure
@@ -61,7 +62,7 @@ def _process_feed(url: str, d: Any) -> FeedAndEntries:
updated=None,
title=_get(d, 'title', str),
link=_get(d, 'home_page_url', str),
- author=_get_author(d),
+ authors=_get_authors(d),
subtitle=_get(d, 'description', str),
version=version_code,
)
@@ -93,33 +94,31 @@ def _get(
return cast(Union[_T, _U, _V], value)
-def _get_author(d: Any) -> str | None:
- # from the spec:
- #
- # > JSON Feed version 1 specified a singular author field
- # > instead of the authors array used in version 1.1.
- # > New feeds should use authors, even if only 1 author is needed.
- # > Existing feeds can include both author and authors
- # > for compatibility with existing feed readers.
- # > Feed readers should always prefer authors if present.
-
- author: dict[Any, Any] | None
- for maybe_author in _get(d, 'authors', list) or ():
+def _get_authors(d: Any) -> tuple[Author, ...]:
+ authors = []
+
+ maybe_authors = _get(d, 'authors', list)
+ single_author = _get(d, 'author', dict)
+
+ # Feed readers should always prefer authors if present
+ if not maybe_authors and single_author:
+ maybe_authors = [single_author]
+
+ for maybe_author in maybe_authors or ():
if isinstance(maybe_author, dict):
- author = maybe_author
- break
- else:
- author = _get(d, 'author', dict)
+ name = _get(maybe_author, 'name', str)
+ url = _get(maybe_author, 'url', str)
- if not author:
- return None
+ href = url
+ email = None
+ if url and url.lower().startswith('mailto:'):
+ email = url[7:] # strip 'mailto:'
+ href = None
- # we only have one for now, it'll be the first one
- return (
- _get(author, 'name', str)
- # fall back to the URL, at least until we have Feed.authors
- or _get(author, 'url', str)
- )
+ if name or href or email:
+ authors.append(Author(name=name, href=href, email=email))
+
+ return tuple(authors)
def _process_entry(feed_url: str, d: Any, feed_lang: str | None) -> EntryData:
@@ -174,7 +173,7 @@ def _process_entry(feed_url: str, d: Any, feed_lang: str | None) -> EntryData:
updated=updated,
title=_get(d, 'title', str),
link=_get(d, 'url', str),
- author=_get_author(d),
+ authors=_get_authors(d),
published=published,
summary=_get(d, 'summary', str),
content=tuple(content),
diff --git a/src/reader/_plugins/legacy/enclosure_tags.py b/src/reader/_plugins/legacy/enclosure_tags.py
index 19e8f529..c424a902 100644
--- a/src/reader/_plugins/legacy/enclosure_tags.py
+++ b/src/reader/_plugins/legacy/enclosure_tags.py
@@ -151,7 +151,7 @@ def enclosure_tags_filter(enclosure, entry, feed_tags):
album = striptags(album)
args['album'] = album
args['artist'] = album
- elif artist := (entry.author or entry.feed.author):
+ elif artist := (entry.author_str or entry.feed.author_str):
args['artist'] = striptags(artist)
for tag in feed_tags:
diff --git a/src/reader/_storage/_entries.py b/src/reader/_storage/_entries.py
index 50c048ef..6b6b4a6d 100644
--- a/src/reader/_storage/_entries.py
+++ b/src/reader/_storage/_entries.py
@@ -24,6 +24,7 @@
from ..exceptions import EntryExistsError
from ..exceptions import EntryNotFoundError
from ..exceptions import FeedNotFoundError
+from ..types import Author
from ..types import Content
from ..types import Enclosure
from ..types import Entry
@@ -485,11 +486,23 @@ def entry_factory(row: tuple[Any, ...]) -> Entry:
sequence,
) = row[14:33]
+ # Parse main entry authors
+ authors = tuple(Author(**d) for d in json.loads(author)) if author else ()
+
source_obj = None
if source:
source_dict = json.loads(source)
if source_dict['updated']:
source_dict['updated'] = convert_timestamp(source_dict['updated'])
+
+ # Parse source feed authors
+ source_author_json = source_dict.pop('author', None)
+ source_dict['authors'] = (
+ tuple(Author(**d) for d in json.loads(source_author_json))
+ if source_author_json
+ else ()
+ )
+
source_obj = EntrySource(**source_dict)
return Entry(
@@ -497,7 +510,7 @@ def entry_factory(row: tuple[Any, ...]) -> Entry:
convert_timestamp(updated) if updated else None,
title,
link,
- author,
+ authors,
convert_timestamp(published) if published else None,
summary,
tuple(Content(**d) for d in json.loads(content)) if content else (),
@@ -679,6 +692,10 @@ def entry_update_intent_to_dict(intent: EntryUpdateIntent) -> EntryDict:
if entry.enclosures
else None
),
+ # Serialize the entry authors
+ authors=(
+ json.dumps([a._asdict() for a in entry.authors]) if entry.authors else None
+ ),
updated=adapt_datetime(entry.updated) if entry.updated else None,
published=adapt_datetime(entry.published) if entry.published else None,
last_updated=adapt_datetime(intent.last_updated),
@@ -700,8 +717,20 @@ def entry_update_intent_to_dict(intent: EntryUpdateIntent) -> EntryDict:
source_dict = entry.source._asdict()
if entry.source.updated:
source_dict['updated'] = adapt_datetime(entry.source.updated)
+
+ # Serialize the source authors and rename key to 'author'
+ source_authors = source_dict.pop('authors', ())
+ source_dict['author'] = (
+ json.dumps([a._asdict() for a in source_authors])
+ if source_authors
+ else None
+ )
+
context['source'] = json.dumps(source_dict)
+ # Rename the context key from 'authors' to 'author' to match SQLite column
+ context['author'] = context.pop('authors', None)
+
context['feed'] = context.pop('feed_url')
return cast(EntryDict, context)
diff --git a/src/reader/_storage/_feeds.py b/src/reader/_storage/_feeds.py
index 548952ef..071572aa 100644
--- a/src/reader/_storage/_feeds.py
+++ b/src/reader/_storage/_feeds.py
@@ -19,6 +19,7 @@
from .._utils import zero_or_one
from ..exceptions import FeedExistsError
from ..exceptions import FeedNotFoundError
+from ..types import Author
from ..types import ExceptionInfo
from ..types import Feed
from ..types import FeedCounts
@@ -290,12 +291,16 @@ def feed_factory(row: tuple[Any, ...]) -> Feed:
update_after,
last_retrieved,
) = row[:14]
+
+ # Parse the JSON string into Author objects
+ authors = tuple(Author(**d) for d in json.loads(author)) if author else ()
+
return Feed(
url,
convert_timestamp(updated) if updated else None,
title,
link,
- author,
+ authors,
subtitle,
version,
user_title,
@@ -371,6 +376,12 @@ def feed_update_intent_to_dict(intent: FeedUpdateIntent) -> FeedDict:
context['stale'] = 0
+ # Serialize `authors` and map it back to the `author` column
+ authors = context.pop('authors', ())
+ context['author'] = (
+ json.dumps([a._asdict() for a in authors]) if authors else None
+ )
+
if isinstance(value, ExceptionInfo):
context['last_exception'] = json.dumps(value._asdict())
else:
diff --git a/src/reader/_storage/_schema.py b/src/reader/_storage/_schema.py
index e5cf16a2..d8bbc13b 100644
--- a/src/reader/_storage/_schema.py
+++ b/src/reader/_storage/_schema.py
@@ -319,7 +319,30 @@ def update_from_42_to_43(db: sqlite3.Connection, /) -> None: # pragma: no cover
db.execute("ALTER TABLE entries ADD COLUMN source TEXT;")
-VERSION = 43
+def _migrate_author_to_json(raw_author: str | None) -> str | None:
+ if not raw_author:
+ return None
+
+ import json
+
+ from .._parser.feedparser import _parse_rss_authors
+
+ authors = []
+ for a in _parse_rss_authors(raw_author):
+ a['href'] = None
+ authors.append(a)
+
+ return json.dumps(authors)
+
+
+def update_from_43_to_44(db: sqlite3.Connection, /) -> None: # pragma: no cover
+ # https://github.com/lemon24/reader/issues/391
+ db.create_function("MIGRATE_AUTHOR", 1, _migrate_author_to_json)
+ db.execute("UPDATE feeds SET author = MIGRATE_AUTHOR(author);")
+ db.execute("UPDATE entries SET author = MIGRATE_AUTHOR(author);")
+
+
+VERSION = 44
MIGRATIONS = {
# 1-9 removed before 0.1 (last in e4769d8ba77c61ec1fe2fbe99839e1826c17ace7)
@@ -333,6 +356,7 @@ def update_from_42_to_43(db: sqlite3.Connection, /) -> None: # pragma: no cover
40: update_from_40_to_41,
41: update_from_41_to_42,
42: update_from_42_to_43,
+ 43: update_from_43_to_44,
}
MISSING_SUFFIX = (
"; you may have skipped some required migrations, see "
diff --git a/src/reader/_types.py b/src/reader/_types.py
index b87a5afd..158dbc39 100644
--- a/src/reader/_types.py
+++ b/src/reader/_types.py
@@ -33,6 +33,7 @@
from .types import _feed_argument
from .types import _namedtuple_compat
from .types import AnyResourceId
+from .types import Author
from .types import Content
from .types import Enclosure
from .types import Entry
@@ -82,7 +83,10 @@ class FeedData(_namedtuple_compat):
updated: datetime | None = None
title: str | None = None
link: str | None = None
- author: str | None = None
+ #: The authors of the feed.
+ #:
+ #: .. versionadded:: 3.24
+ authors: Sequence[Author] = ()
subtitle: str | None = None
version: str | None = None
@@ -135,7 +139,10 @@ class EntryData(_namedtuple_compat):
updated: datetime | None = None
title: str | None = None
link: str | None = None
- author: str | None = None
+ #: The authors of the feed.
+ #:
+ #: .. versionadded:: 3.24
+ authors: Sequence[Author] = ()
published: datetime | None = None
summary: str | None = None
content: Sequence[Content] = ()
@@ -184,7 +191,7 @@ def entry_data_from_obj(obj: object) -> EntryData:
updated=_getattr_optional_datetime(obj, 'updated'),
title=_getattr_optional(obj, 'title', str),
link=_getattr_optional(obj, 'link', str),
- author=_getattr_optional(obj, 'author', str),
+ authors=tuple(author_from_obj(o) for o in getattr(obj, 'authors', ())),
published=_getattr_optional_datetime(obj, 'published'),
summary=_getattr_optional(obj, 'summary', str),
content=tuple(content_from_obj(o) for o in getattr(obj, 'content', ())),
@@ -235,11 +242,21 @@ def source_from_obj(obj: object) -> EntrySource:
updated=_getattr_optional_datetime(obj, 'updated'),
title=_getattr_optional(obj, 'title', str),
link=_getattr_optional(obj, 'link', str),
- author=_getattr_optional(obj, 'author', str),
+ authors=tuple(author_from_obj(o) for o in getattr(obj, 'authors', ())),
subtitle=_getattr_optional(obj, 'subtitle', str),
)
+def author_from_obj(obj: object) -> Author:
+ if isinstance(obj, Mapping):
+ obj = SimpleNamespace(**obj)
+ return Author(
+ name=_getattr_optional(obj, 'name', str),
+ href=_getattr_optional(obj, 'href', str),
+ email=_getattr_optional(obj, 'email', str),
+ )
+
+
def _getattr(obj: object, name: str, type: type[_T]) -> _T:
# will raise AttributeError implicitly
value = getattr(obj, name)
diff --git a/src/reader/types.py b/src/reader/types.py
index 5727f76a..589ccc99 100644
--- a/src/reader/types.py
+++ b/src/reader/types.py
@@ -4,6 +4,7 @@
import enum
import re
import traceback
+import warnings
from collections.abc import Callable
from collections.abc import Iterable
from collections.abc import Mapping
@@ -56,7 +57,47 @@ def _asdict(self) -> dict[str, Any]:
@dataclass(frozen=True)
-class Feed(_namedtuple_compat):
+class Author(_namedtuple_compat):
+ """Data type representing an author.
+
+ .. versionadded:: 3.24
+
+ """
+
+ #: The name of the author.
+ name: str | None = None
+
+ #: The URL of the author.
+ href: str | None = None
+
+ #: The email of the author.
+ email: str | None = None
+
+
+class _AuthorMixin:
+ """Internal mixin to provide author string properties."""
+
+ authors: Sequence[Author]
+
+ @property
+ def author_str(self) -> str | None:
+ """The author of the feed/entry as a comma-separated list of names."""
+ names = [a.name for a in self.authors if a.name]
+ return ", ".join(names) if names else None
+
+ @property
+ def author(self) -> str | None:
+ """Deprecated alias for .author_str."""
+ warnings.warn(
+ f"{self.__class__.__name__}.author is deprecated; use .authors instead",
+ DeprecationWarning,
+ stacklevel=2,
+ )
+ return self.author_str
+
+
+@dataclass(frozen=True)
+class Feed(_namedtuple_compat, _AuthorMixin):
"""Data type representing a feed.
All :class:`~datetime.datetime` attributes are timezone-aware,
@@ -82,8 +123,10 @@ class Feed(_namedtuple_compat):
#: The URL of a page associated with the feed.
link: str | None = None
- #: The author of the feed.
- author: str | None = None
+ #: The authors of the feed.
+ #:
+ #: .. versionadded:: 3.24
+ authors: Sequence[Author] = ()
#: A description or subtitle for the feed.
#:
@@ -205,7 +248,7 @@ def from_exception(cls, exc: BaseException) -> Self:
@dataclass(frozen=True)
-class Entry(_namedtuple_compat):
+class Entry(_namedtuple_compat, _AuthorMixin):
"""Data type representing an entry.
All :class:`~datetime.datetime` attributes are timezone-aware,
@@ -248,8 +291,10 @@ def feed_url(self) -> str:
#: The URL of a page associated with the entry.
link: str | None = None
- #: The author of the feed.
- author: str | None = None
+ #: The authors of the feed.
+ #:
+ #: .. versionadded:: 3.24
+ authors: Sequence[Author] = ()
#: The date the entry was published, according to the feed.
published: datetime | None = None
@@ -473,7 +518,7 @@ class Enclosure(_namedtuple_compat):
@dataclass(frozen=True)
-class EntrySource(_namedtuple_compat):
+class EntrySource(_namedtuple_compat, _AuthorMixin):
"""Metadata of a source feed (used with :attr:`Entry.source`).
.. versionadded:: 3.16
@@ -498,8 +543,10 @@ class EntrySource(_namedtuple_compat):
#: The URL of a page associated with the feed.
link: str | None = None
- #: The author of the feed.
- author: str | None = None
+ #: The authors of the feed.
+ #:
+ #: .. versionadded:: 3.24
+ authors: Sequence[Author] = ()
#: A description or subtitle for the feed.
subtitle: str | None = None
diff --git a/tests/data/empty.atom.py b/tests/data/empty.atom.py
index c1319ff1..bc5fdd55 100644
--- a/tests/data/empty.atom.py
+++ b/tests/data/empty.atom.py
@@ -4,6 +4,7 @@
from reader import Enclosure
from reader._types import EntryData
from reader._types import FeedData
+from reader.types import Author
feed = FeedData(url=f'{url_base}empty.atom', version='atom10')
diff --git a/tests/data/full.atom.py b/tests/data/full.atom.py
index 30e00446..d0b244c4 100644
--- a/tests/data/full.atom.py
+++ b/tests/data/full.atom.py
@@ -5,13 +5,14 @@
from reader import EntrySource
from reader._types import EntryData
from reader._types import FeedData
+from reader.types import Author
feed = FeedData(
url=f'{url_base}full.atom',
updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
title='Example Feed',
link='http://example.org/',
- author='John Doe',
+ authors=(Author(name='John Doe'),),
subtitle='all your examples are belong to us',
version='atom10',
)
@@ -23,7 +24,7 @@
updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
title='Atom-Powered Robots Run Amok',
link='http://example.org/2003/12/13/atom03',
- author='John Doe',
+ authors=(Author(name='John Doe'),),
published=datetime.datetime(2003, 12, 13, 17, 17, 51, tzinfo=datetime.UTC),
summary='Some text.',
content=(
@@ -60,7 +61,7 @@
updated=datetime.datetime(2003, 12, 13, 18, 30, 2, tzinfo=datetime.UTC),
title='Source Title',
link='http://example.org/source',
- author='Source Author',
+ authors=(Author(name='Source Author'),),
subtitle='source subtitle',
),
),
diff --git a/tests/data/full.json.py b/tests/data/full.json.py
index fdc2a48c..3852f3f9 100644
--- a/tests/data/full.json.py
+++ b/tests/data/full.json.py
@@ -4,13 +4,17 @@
from reader import Enclosure
from reader._types import EntryData
from reader._types import FeedData
+from reader.types import Author
feed = FeedData(
url=f'{url_base}full.json',
updated=None,
title='My Example Feed',
link='https://example.org/',
- author='Joe',
+ authors=(
+ Author(name='Joe', href=None, email='joe@example.com'),
+ Author(name='Jane'),
+ ),
subtitle='The Best Example Feed',
version='json11',
)
@@ -22,7 +26,7 @@
updated=datetime.datetime(2020, 1, 4, 0, 0, tzinfo=datetime.UTC),
title="Title",
link="https://example.org/second-item",
- author="mailto:joe@example.com",
+ authors=(Author(email="joe@example.com"),),
published=datetime.datetime(2020, 1, 2, 21, 0, tzinfo=datetime.UTC),
summary="A summary",
content=(
@@ -54,7 +58,7 @@
updated=None,
title=None,
link='https://example.org/initial-post',
- author='Jane',
+ authors=(Author(name='Jane'),),
published=datetime.datetime(2020, 1, 2, 12, 0, tzinfo=datetime.UTC),
summary=None,
content=(
diff --git a/tests/data/full.rss.py b/tests/data/full.rss.py
index 1afb79e9..15e3ea77 100644
--- a/tests/data/full.rss.py
+++ b/tests/data/full.rss.py
@@ -5,13 +5,14 @@
from reader import EntrySource
from reader._types import EntryData
from reader._types import FeedData
+from reader.types import Author
feed = FeedData(
url=f'{url_base}full.rss',
updated=datetime.datetime(2010, 9, 6, 0, 1, tzinfo=datetime.UTC),
title='RSS Title',
link='http://www.example.com/main.html',
- author='Example editor (me@example.com)',
+ authors=(Author(name='Example editor', email='me@example.com'),),
subtitle='This is an example of an RSS feed',
version='rss20',
)
@@ -23,7 +24,7 @@
updated=None,
title='Example entry',
link='http://www.example.com/blog/post/1',
- author='Example editor',
+ authors=(Author(name='Example editor'),),
published=datetime.datetime(2009, 9, 6, 16, 20, tzinfo=datetime.UTC),
summary='Here is some text containing an interesting description.',
content=(
diff --git a/tests/data/invalid.json.py b/tests/data/invalid.json.py
index f195d92c..2f29ee1f 100644
--- a/tests/data/invalid.json.py
+++ b/tests/data/invalid.json.py
@@ -4,6 +4,7 @@
from reader import Enclosure
from reader._types import EntryData
from reader._types import FeedData
+from reader.types import Author
feed = FeedData(
url=f'{url_base}invalid.json',
@@ -41,7 +42,7 @@
feed_url=feed.url,
id='author name fallback',
updated=None,
- author='mailto:joe@example.com',
+ authors=(Author(email='joe@example.com'),),
),
EntryData(
feed_url=feed.url,
@@ -57,6 +58,6 @@
feed_url=feed.url,
id='second author is good',
updated=None,
- author='Jane',
+ authors=(Author(name='Jane'),),
),
]
diff --git a/tests/test__types.py b/tests/test__types.py
index 3ae57d32..fdfb7f24 100644
--- a/tests/test__types.py
+++ b/tests/test__types.py
@@ -97,12 +97,13 @@ def test_entry_data_from_obj(data_dir, feed_type, data_file):
assert entry == entry_data_from_obj(entry), i
entry_dict = entry._asdict()
- if 'content' in entry_dict:
- entry_dict['content'] = [c._asdict() for c in entry_dict['content']]
- if 'enclosures' in entry_dict:
- entry_dict['enclosures'] = [e._asdict() for e in entry_dict['enclosures']]
+ entry_dict['content'] = [c._asdict() for c in entry_dict['content']]
+ entry_dict['enclosures'] = [e._asdict() for e in entry_dict['enclosures']]
+ entry_dict['authors'] = [a._asdict() for a in entry_dict['authors']]
if entry_dict.get('source'):
- entry_dict['source'] = entry_dict['source']._asdict()
+ source_dict = entry_dict['source']._asdict()
+ source_dict['authors'] = [a._asdict() for a in source_dict['authors']]
+ entry_dict['source'] = source_dict
assert entry == entry_data_from_obj(entry_dict), i
@@ -162,6 +163,12 @@ def test_entry_data_from_obj(data_dir, feed_type, data_file):
enclosures=[SimpleNamespace(href='href', length='1')],
),
),
+ (
+ TypeError,
+ SimpleNamespace(
+ feed_url='feed', id='id', authors=[SimpleNamespace(name=1)]
+ ),
+ ),
],
)
def test_entry_data_from_obj_errors(exc, entry):
@@ -174,4 +181,6 @@ def test_entry_data_from_obj_errors(exc, entry):
entry_dict['content'] = [dict(vars(c)) for c in entry_dict['content']]
if 'enclosures' in entry_dict:
entry_dict['enclosures'] = [dict(vars(e)) for e in entry_dict['enclosures']]
+ if 'authors' in entry_dict:
+ entry_dict['authors'] = [dict(vars(a)) for a in entry_dict['authors']]
entry_data_from_obj(entry_dict)
diff --git a/tests/test_parser.py b/tests/test_parser.py
index ed000b9a..a80aabba 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -15,6 +15,7 @@
from reader._parser import Parser
from reader._parser import RetrievedFeed
from reader._parser import RetrieveError
+from reader._parser.feedparser import _parse_authors
from reader._parser.feedparser import feedparser
from reader._parser.feedparser import FeedparserParser
from reader._parser.file import FileRetriever
@@ -22,6 +23,7 @@
from reader._parser.requests import SessionWrapper
from reader._types import FeedData
from reader.exceptions import ParseError
+from reader.types import Author
from utils import make_url_base
@@ -1166,3 +1168,153 @@ def test_reader_use_system_feedparser(monkeypatch, reload_module):
monkeypatch.setenv(name, '1')
reload_module(reader._parser.feedparser)
assert reader._parser.feedparser.feedparser is feedparser
+
+
+def test_feedparser_parse_authors_rss():
+ """Test the custom RSS author string splitting logic against known edge cases."""
+
+ # Normal
+ assert _parse_authors({'author': 'John Doe'}, is_rss=True) == (
+ Author(name='John Doe'),
+ )
+
+ assert _parse_authors({'author': 'John Doe (john@example.com)'}, is_rss=True) == (
+ Author(name='John Doe', email='john@example.com'),
+ )
+
+ # Multiple names (comma separated)
+ assert _parse_authors({'author': 'John Doe, Jane Smith'}, is_rss=True) == (
+ Author(name='John Doe'),
+ Author(name='Jane Smith'),
+ )
+
+ assert _parse_authors({'author': 'death and gravity'}, is_rss=True) == (
+ Author(name='death and gravity'),
+ )
+
+ # Empty cases
+ assert _parse_authors({'author': ''}, is_rss=True) == ()
+ assert _parse_authors({'author': '()'}, is_rss=True) == ()
+
+ # Multiple names with a single fallback email/href
+ assert _parse_authors(
+ {
+ 'author': 'John, Jane',
+ 'author_detail': {
+ 'email': 'podcast@example.com',
+ 'href': 'http://example.com',
+ },
+ },
+ is_rss=True,
+ ) == (
+ Author(name='John', email='podcast@example.com', href='http://example.com'),
+ Author(name='Jane', email='podcast@example.com', href='http://example.com'),
+ )
+
+ # If one has an inline email, so the fallback email should NOT be applied
+ assert _parse_authors(
+ {
+ 'author': 'John (john@example.com), Jane',
+ 'author_detail': {'email': 'podcast@example.com'},
+ },
+ is_rss=True,
+ ) == (
+ Author(name='John', email='john@example.com'),
+ Author(name='Jane', email='podcast@example.com'),
+ )
+
+
+def test_feedparser_parse_authors_atom():
+ """Test clean Atom author_detail extraction."""
+
+ atom_thing = {
+ 'author_detail': {
+ 'name': 'Alice',
+ 'email': 'alice@example.com',
+ 'href': 'http://example.com',
+ }
+ }
+
+ assert _parse_authors(atom_thing, is_rss=False) == (
+ Author(name='Alice', email='alice@example.com', href='http://example.com'),
+ )
+
+
+def test_feedparser_parse_authors_empty_details():
+ """Test parser handles empty or garbage author data gracefully."""
+
+ from reader._parser.feedparser import _parse_authors
+ from reader.types import Author
+
+ # Atom feed where author_detail exists but lacks name/email/href
+ assert _parse_authors({'author_detail': {'unrelated': 'data'}}, is_rss=False) == ()
+
+ # RSS feed where author is empty, and fallback author_detail lacks name/email/href
+ assert (
+ _parse_authors(
+ {'author': '', 'author_detail': {'unrelated': 'data'}}, is_rss=True
+ )
+ == ()
+ )
+
+ # RSS feed where author string has empty parts between commas
+ assert _parse_authors({'author': 'John, , Jane'}, is_rss=True) == (
+ Author(name='John'),
+ Author(name='Jane'),
+ )
+ assert _parse_authors({'author': ' , '}, is_rss=True) == ()
+
+ # RSS fallback block where 'name' is truthy but others are falsy
+ assert _parse_authors(
+ {'author': '', 'author_detail': {'name': 'A'}}, is_rss=True
+ ) == (Author(name='A'),)
+
+ # RSS fallback block where 'href' is truthy, but author is exactly '()'
+ assert _parse_authors(
+ {'author': '()', 'author_detail': {'href': 'B'}}, is_rss=True
+ ) == (Author(href='B'),)
+
+ # RSS comma-split: email inside parens is empty -> `if email:` evaluates False
+ assert _parse_authors({'author': 'John ()'}, is_rss=True) == (Author(name='John'),)
+
+ # RSS comma-split: part is exactly `()` -> `if name or email:` evaluates False
+ assert _parse_authors({'author': 'John, ()'}, is_rss=True) == (Author(name='John'),)
+
+ # RSS comma-split: name is empty before parens -> `name or None` evaluates to None
+ assert _parse_authors({'author': '(john@example.com)'}, is_rss=True) == (
+ Author(email='john@example.com'),
+ )
+
+
+def test_jsonfeed_empty_author_fallback():
+ """Test JSON Feed 1.0 fallback with an empty author object to satisfy branch coverage."""
+ feed, _ = jsonfeed_parse(
+ 'url',
+ """
+ {
+ "version": "https://jsonfeed.org/version/1.0",
+ "title": "My Feed",
+ "author": {"unrelated": "data"}
+ }
+ """,
+ )
+ assert feed.authors == ()
+
+
+def test_jsonfeed_author_mailto():
+ """Test JSON Feed author url starting with mailto: is mapped to email."""
+ from reader.types import Author
+
+ feed, _ = jsonfeed_parse(
+ 'url',
+ """
+ {
+ "version": "https://jsonfeed.org/version/1.1",
+ "title": "My Feed",
+ "authors": [
+ {"name": "Alice", "url": "mailto:alice@example.com"}
+ ]
+ }
+ """,
+ )
+ assert feed.authors == (Author(name="Alice", email="alice@example.com", href=None),)
diff --git a/tests/test_reader.py b/tests/test_reader.py
index 9e4229ae..0b6a4ecd 100644
--- a/tests/test_reader.py
+++ b/tests/test_reader.py
@@ -37,6 +37,7 @@
from reader._types import FeedToUpdate
from reader._types import FeedUpdateIntent
from reader.core import DEFAULT_RESERVED_NAME_SCHEME
+from reader.types import Author
from reader_methods import enable_and_update_search
from reader_methods import get_entries
from reader_methods import get_entries_random
@@ -394,12 +395,12 @@ def title_tuple(feed):
def test_data_roundtrip(reader, parser):
- feed = parser.feed(1, datetime(2010, 1, 1), author='feed author')
+ feed = parser.feed(1, datetime(2010, 1, 1), authors=(Author(name='feed author'),))
entry = parser.entry(
1,
1,
datetime(2010, 1, 1),
- author='entry author',
+ authors=(Author(name='entry author'),),
summary='summary',
content=(Content('value3', 'type', 'en'), Content('value2')),
enclosures=(Enclosure('http://e1', 'type', 1000), Enclosure('http://e2')),
@@ -442,7 +443,7 @@ def test_data_hashes_remain_stable(parser):
datetime(2010, 1, 1),
title='Feed #1',
link='http://www.example.com/1',
- author='feed author',
+ authors=(Author(name='feed author'),),
)
entry = parser.entry(
1,
@@ -450,25 +451,23 @@ def test_data_hashes_remain_stable(parser):
datetime(2010, 1, 1),
title='Entry #1',
link='http://www.example.com/entries/1',
- author='entry author',
+ authors=(Author(name='entry author'),),
summary='summary',
content=(Content('value3', 'type', 'en'), Content('value2')),
enclosures=(Enclosure('http://e1', 'type', 1000), Enclosure('http://e2')),
)
- assert feed.hash == b'\x00\xda\xf5\xa1Je\x13],\xf0\xdb\xaa\x88d\x99\xc6'
- assert entry.hash == b'\x00f\xa9\xdb\t5\xdf\xedcK\xd9bm\x80,l'
+ assert feed.hash == b'\x00\x9dS?\xf6\x9c\x9f=\x96\xb6JD\x0f\xef\x10\x82'
+ assert entry.hash == b'\x00+mI;N\xa3\x86\xff\x07\xdf\xd1\xe3\xfb\xbf\x12'
assert feed._replace(url='x', updated='x').hash == feed.hash
assert (
- feed._replace(title='x').hash
- == b'\x00\xce\x81\xc7\x8d(\xab\xd8)\x06\x90?\xf9\x847\xc4'
+ feed._replace(title='x').hash == b'\x00bd\xb1*\x80\xf1u_\x17\xb4\n\xfe\xb2\x1e0'
)
assert entry._replace(feed_url='x', id='x', updated='x').hash == entry.hash
assert (
- entry._replace(title='x').hash
- == b'\x00\x95\xc4\xe9\xd3\x95\xf6\xff\xf0*\xbd\x00L\x08\x1a\xa2'
+ entry._replace(title='x').hash == b'\x00\xd3V\x19.;U6\xc9q\xc8\x88\xc2%\xff\xac'
)
@@ -732,7 +731,11 @@ def drop_feed(entry):
@pytest.mark.parametrize('new_feed_url', ['3', '2'])
def test_change_feed_url_second_update(reader, new_feed_url):
reader._parser.feed(
- 1, datetime(2010, 1, 1), title='old title', author='old author', link='old link'
+ 1,
+ datetime(2010, 1, 1),
+ title='old title',
+ authors=(Author(name='old author'),),
+ link='old link',
)
reader.update_feeds()
reader.update_search()
@@ -755,7 +758,7 @@ def test_change_feed_url_second_update(reader, new_feed_url):
eval(new_feed_url),
datetime(2010, 1, 2),
title='new title',
- author='new author',
+ authors=(Author(name='new author'),),
link='new link',
)
reader._parser.entry(eval(new_feed_url), 1, datetime(2010, 1, 1))
@@ -771,7 +774,7 @@ def test_change_feed_url_second_update(reader, new_feed_url):
last_retrieved=datetime(2010, 1, 3),
update_after=datetime(2010, 1, 3, 1),
title='new title',
- author='new author',
+ authors=(Author(name='new author'),),
link='new link',
)
diff --git a/tests/test_reader_search.py b/tests/test_reader_search.py
index 65318b49..79e1eaed 100644
--- a/tests/test_reader_search.py
+++ b/tests/test_reader_search.py
@@ -22,6 +22,7 @@
from reader._types import Action
from reader._types import Change
from reader.exceptions import ChangeTrackingNotEnabledError
+from reader.types import Author
from test_reader_counts import entries_per_day
from utils import rename_argument
from utils import utc_datetime
@@ -443,7 +444,11 @@ def strip_html(*args, **kwargs):
(old_result,) = reader.search_entries('entry OR feed')
feed = parser.feed(
- 1, datetime(2010, 1, 2), title='feed', link='link', author='author'
+ 1,
+ datetime(2010, 1, 2),
+ title='feed',
+ link='link',
+ authors=(Author(name='author'),),
)
entry = parser.entry(
1,
@@ -453,7 +458,7 @@ def strip_html(*args, **kwargs):
summary='summary',
content=[Content('content')],
link='link',
- author='author',
+ authors=(Author(name='author'),),
published=datetime(2010, 1, 2),
enclosures=[Enclosure('enclosure')],
)
diff --git a/tests/test_storage.py b/tests/test_storage.py
index f53384cf..835000b5 100644
--- a/tests/test_storage.py
+++ b/tests/test_storage.py
@@ -1,3 +1,4 @@
+import json
import os
import sqlite3
import sys
@@ -13,6 +14,7 @@
from reader import InvalidSearchQueryError
from reader import StorageError
from reader._storage import Storage
+from reader._storage._schema import _migrate_author_to_json
from reader._storage._sqlite_utils import DBError
from reader._storage._sqlite_utils import HeavyMigration
from reader._storage._sqlite_utils import require_version
@@ -594,3 +596,29 @@ def test_get_set_recent_sort(storage):
def test_application_id(storage):
id = storage.factory().execute('pragma application_id').fetchone()[0]
assert id == int.from_bytes(b'read', 'big')
+
+
+def test_migrate_author_to_json():
+ """Ensure the SQLite migration correctly converts legacy strings to JSON."""
+
+ # Null or empty cases
+ assert _migrate_author_to_json(None) is None
+ assert _migrate_author_to_json("") is None
+
+ # Legacy string cases
+ result = _migrate_author_to_json("John Doe (john@example.com)")
+ parsed = json.loads(result)
+ assert len(parsed) == 1
+ assert parsed[0]["name"] == "John Doe"
+ assert parsed[0]["href"] is None
+ assert parsed[0]["email"] == "john@example.com"
+
+
+def test_migrate_author_to_json_edge_cases():
+ from reader._storage._schema import _migrate_author_to_json
+
+ # Covers extra commas (empty parts) and strings with only an email
+ assert (
+ _migrate_author_to_json("John,, (email@example.com)")
+ == '[{"name": "John", "email": null, "href": null}, {"name": null, "email": "email@example.com", "href": null}]'
+ )
diff --git a/tests/test_types.py b/tests/test_types.py
index 7ef61cdf..cef54879 100644
--- a/tests/test_types.py
+++ b/tests/test_types.py
@@ -22,6 +22,7 @@
from reader.types import _feed_argument
from reader.types import _namedtuple_compat
from reader.types import _resource_argument
+from reader.types import Author
from reader.types import MISSING
@@ -376,3 +377,35 @@ def test_update_result_properties():
assert result.updated_feed is None
assert result.error is exc
assert result.not_modified is False
+
+
+def test_author_deprecation_warning():
+ """Ensure accessing .author works but emits a DeprecationWarning."""
+
+ # Test Feed formatting: "name (email)"
+ feed = Feed(
+ url='http://example.com',
+ authors=(Author(name='John', email='john@example.com'),),
+ )
+ with pytest.warns(DeprecationWarning, match=r"Feed\.author is deprecated"):
+ assert feed.author == "John"
+
+ # Test Entry formatting: multiple authors
+ entry = Entry(id='1', feed=feed, authors=(Author(name='Jane'), Author(name='Bob')))
+ with pytest.warns(DeprecationWarning, match=r"Entry\.author is deprecated"):
+ assert entry.author == "Jane, Bob"
+
+ # Test empty authors returns None without crashing
+ empty_feed = Feed(url='http://example.com')
+ with pytest.warns(DeprecationWarning):
+ assert empty_feed.author is None
+
+
+def test_author_deprecation_internal_types():
+ import pytest
+
+ from reader.types import Author
+ from reader.types import EntrySource
+
+ with pytest.warns(DeprecationWarning):
+ assert EntrySource(url='url', authors=(Author(name='Src'),)).author == 'Src'