Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,14 @@ Unreleased
* Add ``--json`` option to the ``list`` CLI commands.
Thanks to `Puneet Dixit`_ for the PR.
(:issue:`394`)

* Add :attr:`~Entry.authors` (and corresponding attributes on feeds and sources)
to expose multiple authors and rich author data (name, email, URL).
The old ``author`` string attribute is deprecated.
Thanks to `Anshul Mittal`_ for the PR.
(:issue:`391`)

.. _Puneet Dixit: https://github.com/puneetdixit200
.. _Anshul Mittal: https://github.com/anderson688


Version 3.23
Expand Down
3 changes: 3 additions & 0 deletions docs/api.rst
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ Data objects
.. autoclass:: Feed
:members:

.. autoclass:: Author
:members:

.. autoclass:: ExceptionInfo
:members:

Expand Down
2 changes: 2 additions & 0 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@
'werkzeug.exceptions',
'werkzeug.http',
'yaml',
'structlog',
'structlog.contextvars',
]:
sys.modules[name] = unittest.mock.Mock()

Expand Down
5 changes: 3 additions & 2 deletions docs/guide.rst
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ As seen in the previous sections,
updated=datetime.datetime(2020, 2, 28, 9, 34, 2, tzinfo=datetime.timezone.utc),
title='Hello Internet',
link='http://www.hellointernet.fm/',
author='CGP Grey',
authors=(Author(name='CGP Grey'),),
subtitle='CGP Grey and Brady Haran talk about YouTube, life, work, whatever.',
version='rss20',
user_title=None,
Expand All @@ -386,9 +386,10 @@ As seen in the previous sections,
To get all the feeds, use the :meth:`~Reader.get_feeds` method::

>>> for feed in reader.get_feeds():
... authors = ", ".join(a.name for a in feed.authors if a.name) or 'unknown author'
... print(
... feed.title or feed.url,
... f"by {feed.author or 'unknown author'},",
... f"by {authors},",
... f"updated on {feed.updated or 'never'}",
... )
...
Expand Down
1 change: 1 addition & 0 deletions src/reader/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@
FeedToImport as FeedToImport,
FeedImportResult as FeedImportResult,
FeedExport as FeedExport,
Author as Author,
)

from .exceptions import (
Expand Down
10 changes: 9 additions & 1 deletion src/reader/_app/legacy/templates/entry.html
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,15 @@
<ul class="controls">

<li>
{% if entry.author %} by {{ entry.author }}{% endif %}
{% if entry.authors %}
by
{% for author in entry.authors -%}
{% if author.href %}<a href="{{ author.href }}" title="{{ author.email or '' }}">{% endif %}
{{- author.name or author.email or 'unknown' -}}
{% if author.href %}</a>{% endif %}
{%- if not loop.last %}, {% endif %}
{%- endfor %}
{% endif %}
in <a href="{{ url_for('.entries', feed=entry.feed.url) }}">{{ entry.feed_resolved_title or feed.url }}</a>
<li>
{%- set published = entry.published or entry.updated_not_none -%}
Expand Down
4 changes: 2 additions & 2 deletions src/reader/_app/templates/macros.html
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@


{% macro feed_author(feed) %}
{%- set author = feed.author if feed.author != feed.resolved_title else none -%}
{%- set author = feed.author_str if feed.author_str != feed.resolved_title else none -%}
{%- if author %}
<li class="list-inline-item">
by {{ author }}
Expand Down Expand Up @@ -235,7 +235,7 @@


{% macro entry_author(entry) %}
{%- set author = entry.author or entry.feed.author -%}
{%- set author = entry.author_str or entry.feed.author_str -%}
{%- set author = author if author != entry.feed_resolved_title else none -%}
{%- if author %}
<li class="list-inline-item">
Expand Down
67 changes: 64 additions & 3 deletions src/reader/_parser/feedparser.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
from .._types import EntryData
from .._types import FeedData
from ..exceptions import ParseError
from ..types import Author
from ..types import Content
from ..types import Enclosure
from ..types import EntrySource
Expand Down Expand Up @@ -91,7 +92,7 @@ def _process_feed(url: str, d: Any) -> tuple[FeedData, list[EntryData]]:
_get_datetime_attr(d.feed, 'updated_parsed'),
d.feed.get('title'),
d.feed.get('link'),
d.feed.get('author'),
_parse_authors(d.feed, is_rss),
d.feed.get('subtitle'),
d.version,
)
Expand Down Expand Up @@ -180,7 +181,7 @@ def _process_entry(feed_url: str, entry: Any, is_rss: bool) -> EntryData:
_get_datetime_attr(data, 'updated_parsed'),
source_title,
data.get('link'),
data.get('author'),
_parse_authors(data, is_rss),
data.get('subtitle'),
)

Expand All @@ -190,10 +191,70 @@ def _process_entry(feed_url: str, entry: Any, is_rss: bool) -> EntryData:
_get_datetime_attr(entry, 'updated_parsed'),
entry.get('title'),
entry.get('link'),
entry.get('author'),
_parse_authors(entry, is_rss),
_get_datetime_attr(entry, 'published_parsed'),
entry.get('summary'),
tuple(content),
tuple(enclosures),
source,
)


def _parse_rss_authors(raw_author: str) -> list[dict[str, str | None]]:
authors = []
for part in raw_author.split(','):
part = part.strip()
if not part:
continue

name, email = part, None
if part.endswith(')') and '(' in part:
split_idx = part.rfind('(')
name = part[:split_idx].strip()
email = part[split_idx + 1 : -1].strip()

if name or email:
authors.append({'name': name or None, 'email': email or None})

return authors


def _parse_authors(thing: Any, is_rss: bool) -> tuple[Author, ...]:
author_detail = thing.get('author_detail') or {}

# 1. Atom
if not is_rss and author_detail:
name = author_detail.get('name')
email = author_detail.get('email')
href = author_detail.get('href')
if name or email or href:
return (Author(name=name or None, email=email or None, href=href or None),)
return ()

# 2. RSS
raw_author = thing.get('author', '')

# Fallback to author_detail if string is entirely empty or "()"
if not raw_author or raw_author == '()':
name = author_detail.get('name')
email = author_detail.get('email')
href = author_detail.get('href')
if name or email or href:
return (Author(name=name or None, email=email or None, href=href or None),)
return ()

# Split by comma
authors = _parse_rss_authors(raw_author)

# Fallback from author_detail if no emails were found in the string
if authors:
fallback_email = author_detail.get('email')
fallback_href = author_detail.get('href')

for a in authors:
if fallback_email and not a['email']:
a['email'] = fallback_email
if fallback_href:
a['href'] = fallback_href

return tuple(Author(**a) for a in authors)
51 changes: 25 additions & 26 deletions src/reader/_parser/jsonfeed.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .._types import EntryData
from .._types import FeedData
from ..exceptions import ParseError
from ..types import Author
from ..types import Content
from ..types import Enclosure

Expand Down Expand Up @@ -61,7 +62,7 @@ def _process_feed(url: str, d: Any) -> FeedAndEntries:
updated=None,
title=_get(d, 'title', str),
link=_get(d, 'home_page_url', str),
author=_get_author(d),
authors=_get_authors(d),
subtitle=_get(d, 'description', str),
version=version_code,
)
Expand Down Expand Up @@ -93,33 +94,31 @@ def _get(
return cast(Union[_T, _U, _V], value)


def _get_author(d: Any) -> str | None:
# from the spec:
#
# > JSON Feed version 1 specified a singular author field
# > instead of the authors array used in version 1.1.
# > New feeds should use authors, even if only 1 author is needed.
# > Existing feeds can include both author and authors
# > for compatibility with existing feed readers.
# > Feed readers should always prefer authors if present.

author: dict[Any, Any] | None
for maybe_author in _get(d, 'authors', list) or ():
def _get_authors(d: Any) -> tuple[Author, ...]:
authors = []

maybe_authors = _get(d, 'authors', list)
single_author = _get(d, 'author', dict)

# Feed readers should always prefer authors if present
if not maybe_authors and single_author:
maybe_authors = [single_author]

for maybe_author in maybe_authors or ():
if isinstance(maybe_author, dict):
author = maybe_author
break
else:
author = _get(d, 'author', dict)
name = _get(maybe_author, 'name', str)
url = _get(maybe_author, 'url', str)

if not author:
return None
href = url
email = None
if url and url.lower().startswith('mailto:'):
email = url[7:] # strip 'mailto:'
href = None

# we only have one for now, it'll be the first one
return (
_get(author, 'name', str)
# fall back to the URL, at least until we have Feed.authors
or _get(author, 'url', str)
)
if name or href or email:
authors.append(Author(name=name, href=href, email=email))

return tuple(authors)


def _process_entry(feed_url: str, d: Any, feed_lang: str | None) -> EntryData:
Expand Down Expand Up @@ -174,7 +173,7 @@ def _process_entry(feed_url: str, d: Any, feed_lang: str | None) -> EntryData:
updated=updated,
title=_get(d, 'title', str),
link=_get(d, 'url', str),
author=_get_author(d),
authors=_get_authors(d),
published=published,
summary=_get(d, 'summary', str),
content=tuple(content),
Expand Down
2 changes: 1 addition & 1 deletion src/reader/_plugins/legacy/enclosure_tags.py
Original file line number Diff line number Diff line change
Expand Up @@ -151,7 +151,7 @@ def enclosure_tags_filter(enclosure, entry, feed_tags):
album = striptags(album)
args['album'] = album
args['artist'] = album
elif artist := (entry.author or entry.feed.author):
elif artist := (entry.author_str or entry.feed.author_str):
args['artist'] = striptags(artist)

for tag in feed_tags:
Expand Down
31 changes: 30 additions & 1 deletion src/reader/_storage/_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
from ..exceptions import EntryExistsError
from ..exceptions import EntryNotFoundError
from ..exceptions import FeedNotFoundError
from ..types import Author
from ..types import Content
from ..types import Enclosure
from ..types import Entry
Expand Down Expand Up @@ -485,19 +486,31 @@ def entry_factory(row: tuple[Any, ...]) -> Entry:
sequence,
) = row[14:33]

# Parse main entry authors
authors = tuple(Author(**d) for d in json.loads(author)) if author else ()

source_obj = None
if source:
source_dict = json.loads(source)
if source_dict['updated']:
source_dict['updated'] = convert_timestamp(source_dict['updated'])

# Parse source feed authors
source_author_json = source_dict.pop('author', None)
source_dict['authors'] = (
tuple(Author(**d) for d in json.loads(source_author_json))
if source_author_json
else ()
)

source_obj = EntrySource(**source_dict)

return Entry(
id,
convert_timestamp(updated) if updated else None,
title,
link,
author,
authors,
convert_timestamp(published) if published else None,
summary,
tuple(Content(**d) for d in json.loads(content)) if content else (),
Expand Down Expand Up @@ -679,6 +692,10 @@ def entry_update_intent_to_dict(intent: EntryUpdateIntent) -> EntryDict:
if entry.enclosures
else None
),
# Serialize the entry authors
authors=(
json.dumps([a._asdict() for a in entry.authors]) if entry.authors else None
),
updated=adapt_datetime(entry.updated) if entry.updated else None,
published=adapt_datetime(entry.published) if entry.published else None,
last_updated=adapt_datetime(intent.last_updated),
Expand All @@ -700,8 +717,20 @@ def entry_update_intent_to_dict(intent: EntryUpdateIntent) -> EntryDict:
source_dict = entry.source._asdict()
if entry.source.updated:
source_dict['updated'] = adapt_datetime(entry.source.updated)

# Serialize the source authors and rename key to 'author'
source_authors = source_dict.pop('authors', ())
source_dict['author'] = (
json.dumps([a._asdict() for a in source_authors])
if source_authors
else None
)

context['source'] = json.dumps(source_dict)

# Rename the context key from 'authors' to 'author' to match SQLite column
context['author'] = context.pop('authors', None)

context['feed'] = context.pop('feed_url')

return cast(EntryDict, context)
13 changes: 12 additions & 1 deletion src/reader/_storage/_feeds.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from .._utils import zero_or_one
from ..exceptions import FeedExistsError
from ..exceptions import FeedNotFoundError
from ..types import Author
from ..types import ExceptionInfo
from ..types import Feed
from ..types import FeedCounts
Expand Down Expand Up @@ -290,12 +291,16 @@ def feed_factory(row: tuple[Any, ...]) -> Feed:
update_after,
last_retrieved,
) = row[:14]

# Parse the JSON string into Author objects
authors = tuple(Author(**d) for d in json.loads(author)) if author else ()

return Feed(
url,
convert_timestamp(updated) if updated else None,
title,
link,
author,
authors,
subtitle,
version,
user_title,
Expand Down Expand Up @@ -371,6 +376,12 @@ def feed_update_intent_to_dict(intent: FeedUpdateIntent) -> FeedDict:

context['stale'] = 0

# Serialize `authors` and map it back to the `author` column
authors = context.pop('authors', ())
context['author'] = (
json.dumps([a._asdict() for a in authors]) if authors else None
)

if isinstance(value, ExceptionInfo):
context['last_exception'] = json.dumps(value._asdict())
else:
Expand Down
Loading
Loading