From c82d0f352b2afa6c536cefc2d6cfc893a5542f8f Mon Sep 17 00:00:00 2001 From: Thomas Kemmer Date: Sat, 27 Feb 2016 18:10:00 +0100 Subject: [PATCH] Various fixes/improvements. --- .travis.yml | 7 - mopidy_podcast/__init__.py | 27 ++- mopidy_podcast/backend.py | 160 ++++++++++------- mopidy_podcast/ext.conf | 21 ++- mopidy_podcast/library.py | 160 ++++++++++------- mopidy_podcast/models.py | 48 ++--- mopidy_podcast/opml.py | 69 -------- mopidy_podcast/playback.py | 1 - mopidy_podcast/rss.py | 25 ++- mopidy_podcast/schema.py | 320 ++++++++++++++++++---------------- mopidy_podcast/sql/schema.sql | 6 +- mopidy_podcast/translator.py | 173 +++++++++--------- setup.py | 1 + tests/test_extension.py | 1 - 14 files changed, 515 insertions(+), 504 deletions(-) delete mode 100644 mopidy_podcast/opml.py diff --git a/.travis.yml b/.travis.yml index 5b3be3a..637d177 100644 --- a/.travis.yml +++ b/.travis.yml @@ -5,13 +5,6 @@ language: python python: - "2.7_with_system_site_packages" -addons: - apt: - sources: - - mopidy-stable - packages: - - mopidy - env: - TOX_ENV=py27 - TOX_ENV=flake8 diff --git a/mopidy_podcast/__init__.py b/mopidy_podcast/__init__.py index 73f4e30..c9f708e 100644 --- a/mopidy_podcast/__init__.py +++ b/mopidy_podcast/__init__.py @@ -2,11 +2,15 @@ import os -from mopidy import config, ext, httpclient +from mopidy import config, exceptions, ext, httpclient __version__ = '2.0.0' +class BackendError(exceptions.BackendError): + pass + + class Extension(ext.Extension): dist_name = 'Mopidy-Podcast' @@ -20,9 +24,8 @@ def get_config_schema(self): schema = super(Extension, self).get_config_schema() schema['feeds'] = config.List(optional=True) schema['import_dir'] = config.Path(optional=True) - schema['update_interval'] = config.Integer(minimum=3600) + schema['update_interval'] = config.Integer(minimum=60) schema['browse_order'] = config.String(choices=['asc', 'desc']) - schema['lookup_order'] = config.String(choices=['asc', 'desc']) schema['search_limit'] = config.Integer(optional=True, minimum=1) schema['cache_size'] = config.Integer(minimum=1) schema['cache_ttl'] = config.Integer(minimum=1) @@ -41,17 +44,11 @@ def setup(self, registry): registry.add('backend', PodcastBackend) @classmethod - def get_url_opener(cls, config): - import urllib2 + def get_requests_session(cls, config): + import requests + session = requests.Session() proxy = httpclient.format_proxy(config['proxy']) - if proxy: - opener = urllib2.build_opener( - urllib2.ProxyHandler({'http': proxy, 'https': proxy}) - ) - else: - opener = urllib2.build_opener() + session.proxies.update({'http': proxy, 'https': proxy}) name = '%s/%s' % (cls.dist_name, cls.version) - opener.addheaders = [ - ('User-agent', httpclient.format_user_agent(name)) - ] - return opener + session.headers['User-Agent'] = httpclient.format_user_agent(name) + return session diff --git a/mopidy_podcast/backend.py b/mopidy_podcast/backend.py index 2586ddf..b5fcbb3 100644 --- a/mopidy_podcast/backend.py +++ b/mopidy_podcast/backend.py @@ -1,9 +1,11 @@ from __future__ import unicode_literals import contextlib +import datetime import logging import os import threading +import xml.etree.ElementTree import cachetools @@ -11,103 +13,130 @@ import pykka -from . import Extension, opml, rss, schema +from . import BackendError, Extension, rss, schema from .library import PodcastLibraryProvider from .playback import PodcastPlaybackProvider logger = logging.getLogger(__name__) +def parse_opml(path): + # http://dev.opml.org/spec2.html + root = xml.etree.ElementTree.parse(path).getroot() + for e in root.findall('./body//outline[@type="rss"]'): + url = e.get('xmlUrl') + if url: + yield url + else: + logger.warning('Found RSS outline without xmlUrl in %s', path) + + +def stream(session, url, **kwargs): + response = session.get(url, stream=True, **kwargs) + response.raise_for_status() + response.raw.decode_content = True + return contextlib.closing(response) + + class PodcastCache(cachetools.TTLCache): pykka_traversable = True def __init__(self, config): - # TODO: missing deprecated in cachetools v1.2 + # TODO: "missing" parameter will be deprecated in cachetools v1.2 super(PodcastCache, self).__init__( maxsize=config[Extension.ext_name]['cache_size'], ttl=config[Extension.ext_name]['cache_ttl'], missing=self.__missing ) - self.__opener = Extension.get_url_opener(config) + self.__session = Extension.get_requests_session(config) self.__timeout = config[Extension.ext_name]['timeout'] - def __missing(self, feedurl): - logger.debug('Podcast cache miss: %s', feedurl) - with contextlib.closing(self.__open(feedurl)) as source: - podcast = rss.parse(source) + def __missing(self, url): + with stream(self.__session, url, timeout=self.__timeout) as r: + podcast = rss.parse(r.raw, url) + logger.debug('Retrieving %s took %s', url, r.elapsed) return podcast - def __open(self, url): - return self.__opener.open(url, timeout=self.__timeout) - -class PodcastIndexer(pykka.ThreadingActor): +class PodcastUpdateActor(pykka.ThreadingActor): def __init__(self, dbpath, config, backend): - super(PodcastIndexer, self).__init__() + super(PodcastUpdateActor, self).__init__() self.__dbpath = dbpath + self.__backend = backend.actor_ref.proxy() self.__import_dir = config[Extension.ext_name]['import_dir'] if self.__import_dir is None: # https://github.com/mopidy/mopidy/issues/1466 try: self.__import_dir = Extension.get_config_dir(config) except Exception as e: - logger.error('Cannot create podcast directory: %s', e) + logger.error('Cannot create podcast import directory: %s', e) self.__feeds = frozenset(config[Extension.ext_name]['feeds']) - self.__opener = Extension.get_url_opener(config) - self.__timer = threading.Timer(0, self.refresh) # initial timeout 0 + self.__session = Extension.get_requests_session(config) + self.__timeout = config[Extension.ext_name]['timeout'] + self.__timer = threading.Timer(0, self.refresh) # initial zero timeout self.__update_interval = config[Extension.ext_name]['update_interval'] - self.__backend = backend.actor_ref.proxy() + self.__update_started = None self.__proxy = self.actor_ref.proxy() def on_start(self): + logger.debug('Starting %s', self.__class__.__name__) self.__timer.start() def on_stop(self): + logger.debug('Stopping %s', self.__class__.__name__) self.__timer.cancel() - def refresh(self): - # TODO: guard/lock while refreshing; keep timestamp for logging - self.__timer = threading.Timer(self.__update_interval, self.refresh) - logger.info('Refreshing %s', Extension.dist_name) - feeds = tuple(self.__feeds.union(self.__scan_import_dir())) + def prepare_update(self, feeds): try: with schema.connect(self.__dbpath) as connection: - schema.cleanup(connection, feeds) - except Exception as e: - logger.error('Error refreshing %s: %s', Extension.dist_name, e) + for uri, _ in schema.list(connection): + if uri not in feeds: + schema.delete(connection, uri) + except Exception: + logger.exception('Error refreshing %s', Extension.dist_name) + self.__update_started = None else: self.__proxy.update(feeds) - self.__timer.start() # try again next time + + def refresh(self): + timer = self.__timer + self.__timer = threading.Timer(self.__update_interval, self.refresh) + timer.cancel() # in case of manual refresh + # prevent multiple concurrent updates + if self.__update_started: + logger.debug('Already refreshing %s', Extension.dist_name) + else: + self.__update_started = datetime.datetime.now() + feeds = tuple(self.__feeds.union(self.__scan_import_dir())) + logger.info('Refreshing %d podcast(s)', len(feeds)) + self.__proxy.prepare_update(feeds) + self.__timer.start() def update(self, feeds): if feeds: head, tail = feeds[0], feeds[1:] - self.__update(head) - self.__proxy.update(tail) - else: - logger.debug('Refreshing %s done', Extension.dist_name) - - def __update(self, feedurl): - try: - podcast = self.__fetch(feedurl) - except pykka.ActorDeadError as e: - logger.debug('Stopped while retrieving %s: %s', feedurl, e) - except Exception as e: - logger.error('Error retrieving podcast %s: %s', feedurl, e) + try: + self.__update(head) + except Exception: + logger.exception('Error refreshing %s', Extension.ext_name) + self.__update_started = None + else: + self.__proxy.update(tail) else: - with schema.connect(self.__dbpath) as connection: - schema.update(connection, podcast) + d = datetime.datetime.now() - self.__update_started + logger.info('Refreshing %s took %s', Extension.dist_name, d) + self.__update_started = None def __fetch(self, feedurl): podcasts = self.__backend.podcasts podcast = podcasts.get(feedurl).get() if podcast is None: - logger.debug('Retrieving podcast %s', feedurl) - # running in the background, no timeout necessary - with contextlib.closing(self.__opener.open(feedurl)) as source: - podcast = rss.parse(source) + # TODO: If-Modified-Since with schema.pubdate(feedurl)? + with stream(self.__session, feedurl, timeout=self.__timeout) as r: + podcast = rss.parse(r.raw, feedurl) + logger.debug('Retrieving %s took %s', feedurl, r.elapsed) podcast = podcasts.setdefault(feedurl, podcast).get() return podcast @@ -115,32 +144,35 @@ def __scan_import_dir(self): result = [] for entry in os.listdir(self.__import_dir): path = os.path.join(self.__import_dir, entry) - if not os.path.isfile(path): - continue - if not path.endswith(b'.opml'): - continue try: - feedurls = self.__parse_file(path) + if not os.path.isfile(path): + continue + elif path.endswith(b'.opml'): + urls = parse_opml(path) + else: + logger.debug('Skipping unknown file %s', path) except Exception as e: logger.error('Error parsing %s: %s', path, e) else: - result.extend(feedurls) + result.extend(urls) return result - def __parse_file(self, path): - with open(path) as fh: - outlines = opml.parse(fh) - for outline in outlines: - if outline.get('type') == 'rss': - yield outline['xmlUrl'] + def __update(self, feedurl): + try: + podcast = self.__fetch(feedurl) + except pykka.ActorDeadError: + logger.debug('Stopped while retrieving %s', feedurl) + except Exception as e: + logger.warning('Cannot update podcast %s: %s', feedurl, e) + else: + with schema.connect(self.__dbpath) as connection: + schema.update(connection, podcast) class PodcastBackend(pykka.ThreadingActor, backend.Backend): uri_schemes = [ 'podcast', - 'podcast+file', - 'podcast+ftp', 'podcast+http', 'podcast+https' ] @@ -149,17 +181,19 @@ def __init__(self, config, audio): super(PodcastBackend, self).__init__() # create/update database schema on startup to catch errors early dbpath = os.path.join(Extension.get_data_dir(config), b'feeds.db') - with schema.connect(dbpath) as connection: - schema.init(connection) + try: + with schema.connect(dbpath) as connection: + schema.init(connection) + except Exception as e: + raise BackendError('Error initializing database: %s' % e) self.library = PodcastLibraryProvider(dbpath, config, backend=self) self.playback = PodcastPlaybackProvider(audio=audio, backend=self) self.podcasts = PodcastCache(config) - # passed to PodcastIndexer.start() - self.__config = config - self.__dbpath = dbpath + # passed to PodcastUpdateActor.start() + self.__update_args = [dbpath, config, self] def on_start(self): - self.indexer = PodcastIndexer.start(self.__dbpath, self.__config, self) + self.indexer = PodcastUpdateActor.start(*self.__update_args) def on_stop(self): self.indexer.stop() diff --git a/mopidy_podcast/ext.conf b/mopidy_podcast/ext.conf index 0fb9d29..19a9b21 100644 --- a/mopidy_podcast/ext.conf +++ b/mopidy_podcast/ext.conf @@ -1,26 +1,29 @@ [podcast] enabled = true -# an optional list of podcast RSS feed URLs to subscribe to; URLs need -# to be seperated by commas or newlines +# optional list of podcast RSS feed URLs to subscribe to; URLs must be +# seperated with commas or newlines feeds = +# optional path to directory containing OPML files for import; uses +# extension config dir if not set import_dir = -# directory update interval in seconds +# directory/index update interval in seconds update_interval = 86400 +# sort podcast episodes by ascending (asc) or descending (desc) +# publication date when browsing browse_order = desc -lookup_order = asc - +# maximum number of search results search_limit = 20 -# number of podcasts to cache +# maximum number of podcasts to cache in memory cache_size = 64 -# cache time-to-live in seconds -cache_ttl = 3600 +# cache time-to-live in seconds; should be <= update_interval +cache_ttl = 86400 -# request timeout in seconds +# HTTP request/database connection timeout in seconds timeout = 10 diff --git a/mopidy_podcast/library.py b/mopidy_podcast/library.py index 98ce93c..b83d145 100644 --- a/mopidy_podcast/library.py +++ b/mopidy_podcast/library.py @@ -3,6 +3,7 @@ import collections import itertools import logging +import operator from mopidy import backend, models @@ -20,45 +21,69 @@ class PodcastLibraryProvider(backend.LibraryProvider): def __init__(self, dbpath, config, backend): super(PodcastLibraryProvider, self).__init__(backend) ext_config = config[Extension.ext_name] - self.__dbpath = dbpath # TODO: pass connection? + self.__dbpath = dbpath self.__reverse_browse = ext_config['browse_order'] == 'desc' - self.__reverse_lookup = ext_config['lookup_order'] == 'desc' self.__search_limit = ext_config['search_limit'] + self.__timeout = ext_config['timeout'] self.__tracks = {} # track cache for faster lookup def browse(self, uri): if uri == self.root_directory.uri: - refs = self.__list() + with self.__connect() as connection: + rows = schema.list(connection) + refs = itertools.starmap(translator.ref, rows) else: - refs = self.__browse(uri) + tracks = self.__tracks = translator.tracks(self.__podcast(uri)) + uris = list(reversed(tracks) if self.__reverse_browse else tracks) + refs = (models.Ref.track(uri=uri, name=tracks[uri].name) + for uri in uris) return list(refs) + def get_distinct(self, field, query): + try: + expr = translator.field(field) + except NotImplementedError: + return [] + try: + params = translator.query(query) + except NotImplementedError: + return [] + with schema.connect(self.__dbpath) as connection: + rows = schema.distinct(connection, expr, **params) + return [row[0] for row in rows] + def get_images(self, uris): - feeds = collections.defaultdict(list) + podcasts = collections.defaultdict(list) for uri in uris: - feeds[uritools.uridefrag(uri).uri].append(uri) + podcasts[uritools.uridefrag(uri).uri].append(uri) result = {} - for feeduri, uris in feeds.items(): + for uri, uris in podcasts.items(): try: - images = self.__images(feeduri) + images = translator.images(self.__podcast(uri)) except Exception as e: - logger.error('Error retrieving images for %s: %s', feeduri, e) + logger.warning('Cannot retrieve images for %s: %s', uri, e) else: result.update((uri, images.get(uri)) for uri in uris) return result def lookup(self, uri): - # pop from __tracks, since we don't want cached items to live too long + # pop from __tracks since we don't want cached tracks to live too long try: - return self.__tracks.pop(uri) + track = self.__tracks.pop(uri) except KeyError: logger.debug('Lookup cache miss: %s', uri) + else: + return [track] try: - self.__tracks = tracks = self.__lookup(uritools.uridefrag(uri).uri) + absuri, fragment = uritools.uridefrag(uri) + self.__tracks = tracks = translator.tracks(self.__podcast(absuri)) + result = [self.__tracks.pop(uri)] if fragment else tracks.values() + except LookupError: + logger.warning('Lookup error for %s', uri) except Exception as e: - logger.error('Lookup failed for %s: %s', uri, e) + logger.warning('Lookup error for %s: %s', uri, e) else: - return tracks.pop(uri, []) + return list(result) def refresh(self, uri=None): # TODO: refresh by uri? @@ -67,63 +92,70 @@ def refresh(self, uri=None): self.__tracks.clear() def search(self, query=None, uris=None, exact=False): + # convert query to schema parameters + try: + params = translator.query(query, exact) + except NotImplementedError: + return None # query not supported # sanitize uris uris = frozenset(uris or []).difference([self.root_directory.uri]) - # translate query to model - try: - query = translator.query(query, uris, exact) - except NotImplementedError as e: - logger.info('Not searching %s: %s', Extension.dist_name, e) - else: - return self.__search(query) - - def __browse(self, uri): - podcast = self.__podcast(uri) - # TODO: prepare self.__tracks for lookup requests (order!) - for track in translator.tracks(podcast, reverse=self.__reverse_browse): - yield models.Ref.track(uri=track.uri, name=track.name) - - def __images(self, uri): - podcast = self.__podcast(uri) - # return result dict as with LibraryController.images(uris) - result = dict(translator.images(podcast)) - result[uri] = [podcast.image] if podcast.image else None - return result - - def __list(self): - with schema.connect(self.__dbpath) as connection: - rows = schema.list(connection) - return itertools.starmap(translator.ref, rows) + # combine search results for multiple uris + refs = [] + for uri in uris or [None]: + if self.__search_limit is None: + limit = None + else: + limit = self.__search_limit - len(refs) + try: + result = self.__search(uri, exact, limit=limit, **params) + except Exception as e: + logger.error('Error searching %s: %s', Extension.ext_name, e) + else: + refs.extend(result) + # convert refs to models; sort on URI for (more) efficient track lookup + # TODO: merge translator.tracks(podcast) for all podcasts in refs? + results = {} + for ref in sorted(refs, key=operator.attrgetter('uri')): + try: + if ref.type == models.Ref.ALBUM: + model = translator.album(self.__podcast(ref.uri)) + elif ref.type == models.Ref.TRACK: + model, = self.lookup(ref.uri) + else: + logger.error('Invalid podcast result type "%s"', ref.type) + except Exception as e: + logger.warning('Error retrieving %s: %s', ref.uri, e) + else: + results[ref.uri] = model + # convert to search result model; keep original result order + albums = [] + tracks = [] + for model in (results[ref.uri] for ref in refs if ref.uri in results): + if isinstance(model, models.Album): + albums.append(model) + elif isinstance(model, models.Track): + tracks.append(model) + else: + raise TypeError('Invalid model type') + return models.SearchResult(albums=albums, tracks=tracks) - def __lookup(self, uri): - podcast = self.__podcast(uri) - # return result dict as with LibraryController.lookup(uris) - result = {uri: []} - for track in translator.tracks(podcast, reverse=self.__reverse_lookup): - result[track.uri] = [track] - result[uri].append(track) - return result + def __connect(self): + return schema.connect(self.__dbpath, timeout=self.__timeout) def __podcast(self, uri): scheme, _, feedurl = uri.partition('+') assert feedurl and scheme == Extension.ext_name return self.backend.podcasts[feedurl] - def __search(self, query): + def __search(self, uri, exact, **params): + if uri is not None: + scheme, _, feedurl = uri.partition('+') + assert feedurl and scheme == Extension.ext_name + else: + feedurl = None with schema.connect(self.__dbpath) as connection: - rows = schema.search(connection, query) # TODO: limit - albums = [] - tracks = [] - # TODO: retrieve podcasts first, or sort by uri - # TODO: do not return tracks if album is already included? - for ref in itertools.starmap(translator.ref, rows): - try: - if ref.type == models.Ref.ALBUM: - albums.append(translator.album(self.__podcast(ref.uri))) - elif ref.type == models.Ref.TRACK: - tracks.extend(self.lookup(ref.uri)) - else: - logger.error('Invalid search result type: %s', ref.type) - except Exception as e: - logger.error('Error retrieving %s: %s', ref.uri, e) - return models.SearchResult(albums=albums, tracks=tracks) + if exact: + rows = schema.search(connection, uri=feedurl, **params) + else: + rows = schema.ftsearch(connection, uri=feedurl, **params) + return itertools.starmap(translator.ref, rows) diff --git a/mopidy_podcast/models.py b/mopidy_podcast/models.py index 388d925..130f41f 100644 --- a/mopidy_podcast/models.py +++ b/mopidy_podcast/models.py @@ -1,6 +1,6 @@ from __future__ import unicode_literals -import datetime +from datetime import datetime, timedelta from mopidy.models import Image, ValidatedImmutableObject, fields @@ -8,15 +8,14 @@ class Enclosure(ValidatedImmutableObject): """Mopidy model type to represent an episode's media object.""" - # TODO: restrict type to {'application/pdf', 'audio/mpeg', 'audio/x-m4a', - # 'document/x-epub', 'video/mp4', 'video/quicktime', 'video/x-m4v'}? - uri = fields.URI() """The URI of the media object.""" length = fields.Integer(min=0) """The media object's file size in bytes.""" + # TODO: restrict type to {'application/pdf', 'audio/mpeg', 'audio/x-m4a', + # 'document/x-epub', 'video/mp4', 'video/quicktime', 'video/x-m4v'}? type = fields.Identifier() """The media object's MIME type, for example :const:`audio/mpeg`.""" @@ -30,29 +29,30 @@ class Episode(ValidatedImmutableObject): title = fields.String() """The episode's title.""" - pubdate = fields.Field(type=datetime.datetime) + # TODO: default necessary for sorting? + pubdate = fields.Field(type=datetime, default=datetime.fromtimestamp(0)) """The episode's publication date as an instance of :class:`datetime.datetime`.""" author = fields.String() """The episode author's name.""" - block = fields.Field(type=bool) + block = fields.Field(type=bool, default=False) """Prevent an episode from appearing in the directory.""" image = fields.Field(type=Image) """An image to be displayed with the episode as an instance of - :class:`Image`. + :class:`mopidy.models.Image`. """ - duration = fields.Field(type=datetime.timedelta) + duration = fields.Field(type=timedelta) """The episode's duration as a :class:`datetime.timedelta`.""" explicit = fields.Field(type=bool) """Indicates whether the episode contains explicit material.""" - order = fields.Integer(min=1) + order = fields.Integer() """Overrides the default ordering of episodes.""" description = fields.String() @@ -86,10 +86,14 @@ class Podcast(ValidatedImmutableObject): language = fields.Identifier() """The podcast's ISO two-letter language code.""" + pubdate = fields.Field(type=datetime) + """The podcast's publication date as an instance of + :class:`datetime.datetime`.""" + author = fields.String() """The podcast author's name.""" - block = fields.Field(type=bool) + block = fields.Field(type=bool, default=False) """Prevent a podcast from appearing in the directory.""" category = fields.String() @@ -97,14 +101,14 @@ class Podcast(ValidatedImmutableObject): image = fields.Field(type=Image) """An image to be displayed with the podcast as an instance of - :class:`Image`. + :class:`mopidy.models.Image`. """ explicit = fields.Field(type=bool) """Indicates whether the podcast contains explicit material.""" - complete = fields.Field(type=bool) + complete = fields.Field(type=bool, default=False) """Indicates completion of the podcast.""" newfeedurl = fields.URI() @@ -118,23 +122,3 @@ class Podcast(ValidatedImmutableObject): instances. """ - - -class Term(ValidatedImmutableObject): - """Mopidy model type to represent a search term.""" - - field = fields.Field(type=fields.Field) - """The search term's field or :class:`None`.""" - - values = fields.Collection(type=basestring, container=frozenset) - """The search terms's set of values.""" - - -class Query(ValidatedImmutableObject): - """Mopidy model type to represent a search query.""" - - terms = fields.Collection(type=Term, container=tuple) - """The query's terms.""" - - exact = fields.Field(type=bool, default=False) - """Indicates an exact query.""" diff --git a/mopidy_podcast/opml.py b/mopidy_podcast/opml.py deleted file mode 100644 index ee1d510..0000000 --- a/mopidy_podcast/opml.py +++ /dev/null @@ -1,69 +0,0 @@ -from __future__ import unicode_literals - -import xml.etree.ElementTree - -# http://dev.opml.org/spec2.html - -OUTLINES = { - None: lambda e: { - 'type': e.get('type'), - 'text': e.get('text') - }, - 'include': lambda e: { - 'type': 'include', - 'text': e.get('text'), - 'url': e.get('url') - }, - 'link': lambda e: { - 'type': 'link', - 'text': e.get('text'), - 'url': e.get('url') - }, - 'rss': lambda e: { - 'type': 'rss', - 'text': e.get('text'), - 'xmlUrl': e.get('xmlUrl') - }, - '': lambda e: { - 'text': e.get('text') - } -} - - -def parse(source): - root = xml.etree.ElementTree.parse(source).getroot() - if root.tag != 'opml' or root.find('body') is None: - raise TypeError('Not a valid OPML document') - outlines = [] - for e in root.find('body').iter(tag='outline'): - type = e.get('type', '').lower() - try: - outline = OUTLINES[type] - except KeyError: - outline = OUTLINES[None] - outlines.append(outline(e)) - return outlines - - -if __name__ == '__main__': - import argparse - import contextlib - import json - import urllib2 - import urlparse - import sys - - parser = argparse.ArgumentParser() - parser.add_argument('uri', metavar='PATH | URI') - parser.add_argument('-i', '--indent', type=int) - parser.add_argument('-t', '--timeout', type=float) - args = parser.parse_args() - - if urlparse.urlsplit(args.uri).scheme: - fh = urllib2.urlopen(args.uri, timeout=args.timeout) - else: - fh = open(args.uri) - with contextlib.closing(fh) as source: - outlines = parse(source) - json.dump(outlines, sys.stdout, indent=args.indent) - sys.stdout.write('\n') diff --git a/mopidy_podcast/playback.py b/mopidy_podcast/playback.py index 40e8406..c2017f7 100644 --- a/mopidy_podcast/playback.py +++ b/mopidy_podcast/playback.py @@ -13,7 +13,6 @@ def get_media_uri(podcast, guid): # TODO: filter media types, blocked? - # TODO: also match media uri for backward compatibility? for episode in podcast.episodes: if episode.guid == guid and episode.enclosure: return episode.enclosure.uri diff --git a/mopidy_podcast/rss.py b/mopidy_podcast/rss.py index f69b2b1..74c6753 100644 --- a/mopidy_podcast/rss.py +++ b/mopidy_podcast/rss.py @@ -94,7 +94,6 @@ def _enclosure(e): def _episode(e): - # TODO: uriencode guid? kwargs = { 'guid': _tag(e, 'guid'), 'title': _tag(e, 'title'), @@ -115,7 +114,7 @@ def _episode(e): return Episode(**kwargs) -def _podcast(e, uri): +def _podcast(e, uri, mtime=None): kwargs = { 'uri': uri, 'title': _tag(e, 'title'), @@ -130,8 +129,14 @@ def _podcast(e, uri): 'complete': _tag(e, 'itunes:complete', _bool), 'newfeedurl': _tag(e, 'itunes:new-feed-url'), 'description': _tag(e, 'itunes:summary'), - 'episodes': map(_episode, e.iter(tag='item')) + 'episodes': tuple(map(_episode, e.iter(tag='item'))) } + # channel and seems to be largely + # unsupported or unreliable with podcasts + if mtime: + kwargs['pubdate'] = datetime.datetime.utcfromtimestamp(mtime) + else: + kwargs['pubdate'] = max(e.pubdate for e in kwargs['episodes']) if not kwargs['image']: kwargs['image'] = _tag(e, 'image', _image) if not kwargs['description']: @@ -143,17 +148,22 @@ def parse(source, uri=None): channel = xml.etree.ElementTree.parse(source).find('channel') if channel is None: raise TypeError('Not an RSS feed') + header = getattr(source, 'headers', {}).get('last-modified') + if header: + mtime = email.utils.mktime_tz(email.utils.parsedate_tz(header)) else: - return _podcast(channel, uri or source.geturl()) + mtime = None + return _podcast(channel, uri or source.geturl(), mtime) if __name__ == '__main__': import argparse import contextlib import json - import urllib2 import sys + from urllib2 import urlopen + import mopidy.models class JSONEncoder(mopidy.models.ModelJSONEncoder): @@ -171,8 +181,7 @@ def default(self, obj): parser.add_argument('-t', '--timeout', type=float) args = parser.parse_args() - opener = urllib2.build_opener() # TODO: proxies, auth, etc. - with contextlib.closing(opener.open(args.uri, timeout=args.timeout)) as f: - obj = parse(f) + with contextlib.closing(urlopen(args.uri, timeout=args.timeout)) as source: + obj = parse(source) json.dump(obj, sys.stdout, cls=JSONEncoder, indent=args.indent) sys.stdout.write('\n') diff --git a/mopidy_podcast/schema.py b/mopidy_podcast/schema.py index 4d70c82..58f4329 100644 --- a/mopidy_podcast/schema.py +++ b/mopidy_podcast/schema.py @@ -3,138 +3,143 @@ import datetime import logging import os -import re import sqlite3 -from . import Extension, models - -PARAMETERS = { - None: 'any', - models.Episode.author: 'episode_author', - models.Episode.description: 'description', - models.Episode.pubdate: 'pubdate', - models.Episode.title: 'episode_title', - models.Podcast.author: 'podcast_author', - models.Podcast.category: 'category', - models.Podcast.title: 'podcast_title' +from . import Extension + +DEFAULT_PARAMS = { + 'episode_title': None, + 'podcast_title': None, + 'episode_author': None, + 'podcast_author': None, + 'category': None, + 'pubdate': None, + 'description': None, + 'any': None, + 'uri': None, + 'limit': -1, + 'offset': 0 } FTPODCAST_COLS = { 'any': 'ftpodcast', - 'podcast_title': 'title', - 'podcast_author': 'author', 'category': 'category', - 'description': 'description' + 'podcast_author': 'author', + 'podcast_title': 'title' } FTEPISODE_COLS = { 'any': 'ftepisode', - 'podcast_title': 'podcast_title', - 'podcast_author': 'podcast_author', - 'episode_title': 'episode_title', - 'episode_author': 'episode_author', - 'pubdate': 'pubdate', 'category': 'category', - 'description': 'description' + 'description': 'description', + 'episode_author': 'episode_author', + 'episode_title': 'episode_title', + 'podcast_author': 'podcast_author', + 'podcast_title': 'podcast_title', + 'pubdate': 'pubdate' } +DISTINCT_QUERY = """ +SELECT DISTINCT %s AS field + FROM episode JOIN podcast USING (uri) + WHERE field IS NOT NULL + AND (:any IS NULL OR :any IN + (episode.title, episode.author, episode.description, + podcast.title, podcast.author, podcast.category) + ) + AND (:podcast_title IS NULL OR :podcast_title = podcast.title) + AND (:episode_title IS NULL OR :episode_title = episode.title) + AND (:podcast_author IS NULL OR :podcast_author = podcast.author) + AND (:episode_author IS NULL OR :episode_author = episode.author) + AND (:category IS NULL OR :category = category) + AND (:pubdate IS NULL OR date(pubdate) = :pubdate) + AND (:description IS NULL OR :description = episode.description) +""" + FULLTEXT_QUERY = """ - SELECT uri AS uri, title AS title, NULL AS guid - FROM podcast - WHERE rowid in (%s) - UNION - SELECT uri AS uri, title AS title, guid AS guid - FROM episode - WHERE rowid in (%s) - ORDER BY title, uri, guid - LIMIT :limit OFFSET :offset +SELECT uri AS uri, title AS title, NULL AS guid, datetime('now') AS rank + FROM podcast + WHERE rowid in (%s) + AND :uri IS NULL + UNION +SELECT uri AS uri, title AS title, guid AS guid, pubdate as rank + FROM episode + WHERE rowid in (%s) + AND (:uri IS NULL OR :uri = uri) + ORDER BY rank DESC, title COLLATE NOCASE + LIMIT :limit OFFSET :offset """ INDEXED_QUERY = """ - SELECT uri AS uri, title AS title, NULL AS guid - FROM podcast - WHERE (:any IS NULL OR :any IN (title, author, category, description)) - AND (:podcast_title IS NULL OR :podcast_title = title) - AND (:episode_title IS NULL) - AND (:podcast_author IS NULL OR :podcast_author = author) - AND (:episode_author IS NULL) - AND (:category IS NULL OR :category = category) - AND (:pubdate IS NULL) - AND (:description IS NULL OR :description = description) - UNION - SELECT e.uri AS uri, e.title AS title, e.guid AS guid - FROM episode AS e - JOIN podcast AS p USING (uri) - WHERE (:any IS NULL OR :any IN (e.title, e.author, e.description)) - AND (:podcast_title IS NULL OR :podcast_title = p.title) - AND (:episode_title IS NULL OR :episode_title = e.title) - AND (:podcast_author IS NULL OR :podcast_author = p.author) - AND (:episode_author IS NULL OR :episode_author = e.author) - AND (:category IS NULL OR :category = p.category) - AND (:pubdate IS NULL OR e.pubdate LIKE date(:pubdate) || '%') - AND (:description IS NULL OR :description = e.description) - ORDER BY title, uri, guid - LIMIT :limit OFFSET :offset +SELECT uri AS uri, title AS title, NULL AS guid, datetime('now') AS rank + FROM podcast + WHERE (:any IS NULL OR :any IN (title, author, category)) + AND (:podcast_title IS NULL OR :podcast_title = title) + AND :episode_title IS NULL + AND (:podcast_author IS NULL OR :podcast_author = author) + AND :episode_author IS NULL + AND (:category IS NULL OR :category = category) + AND :pubdate IS NULL + AND :description IS NULL + AND :uri IS NULL + UNION +SELECT uri AS uri, episode.title AS title, guid AS guid, pubdate AS rank + FROM episode JOIN podcast USING (uri) + WHERE (:any IS NULL OR :any IN + (episode.title, episode.author, episode.description, + podcast.title, podcast.author, podcast.category) + ) + AND (:podcast_title IS NULL OR :podcast_title = podcast.title) + AND (:episode_title IS NULL OR :episode_title = episode.title) + AND (:podcast_author IS NULL OR :podcast_author = podcast.author) + AND (:episode_author IS NULL OR :episode_author = episode.author) + AND (:category IS NULL OR :category = category) + AND (:pubdate IS NULL OR pubdate LIKE date(:pubdate) || '%') + AND (:description IS NULL OR :description = episode.description) + AND (:uri IS NULL OR :uri = uri) + ORDER BY rank DESC, title COLLATE NOCASE + LIMIT :limit OFFSET :offset """ -LIST_QUERY = """ - SELECT uri AS uri, title AS title - FROM podcast - ORDER BY title, uri +LIST_PODCASTS_QUERY = """ +SELECT uri AS uri, title AS title + FROM podcast + ORDER BY title COLLATE NOCASE +""" + +LIST_EPISODES_QUERY = """ +SELECT guid AS guid, title AS title + FROM episode + WHERE uri = :uri + ORDER BY title COLLATE NOCASE +""" + +PUBDATE_QUERY = """ +SELECT pubdate FROM podcast WHERE uri = :uri """ UPDATE_PODCAST = """ - INSERT OR REPLACE INTO podcast ( - uri, - title, - link, - copyright, - language, - author, - block, - category, - explicit, - complete, - newfeedurl, - description - ) VALUES ( - :uri, - :title, - :link, - :copyright, - :language, - :author, - :block, - :category, - :explicit, - :complete, - :newfeedurl, - :description - ) +INSERT OR REPLACE INTO podcast ( + uri, title, link, copyright, language, pubdate, author, block, + category, explicit, complete, newfeedurl, description +) VALUES ( + :uri, :title, :link, :copyright, :language, :pubdate, :author, :block, + :category, :explicit, :complete, :newfeedurl, :description +) """ UPDATE_EPISODE = """ - INSERT OR REPLACE INTO episode ( - uri, - guid, - title, - pubdate, - author, - block, - duration, - explicit, - description - ) VALUES ( - :uri, - :guid, - :title, - :pubdate, - :author, - :block, - :duration, - :explicit, - :description - ) +INSERT OR REPLACE INTO episode ( + uri, guid, title, pubdate, author, block, duration, explicit, + description +) VALUES ( + :uri, :guid, :title, :pubdate, :author, :block, :duration, :explicit, + :description +) +""" + +DELETE_PODCAST = """ +DELETE FROM podcast WHERE uri = :uri """ logger = logging.getLogger(__name__) @@ -176,26 +181,60 @@ def init(cursor, scripts=os.path.join(os.path.dirname(__file__), 'sql')): return user_version -def list(cursor): - return cursor.execute(LIST_QUERY) +def list(connection, uri=None): + if uri is None: + return connection.execute(LIST_PODCASTS_QUERY) + else: + return connection.execute(LIST_EPISODES_QUERY, {uri: uri}) -def search(cursor, query, offset=0, limit=None): - if limit is None: - limit = -1 - if query.exact: - return _indexed_search(cursor, query, offset, limit) +def pubdate(connection, uri): + col, = connection.execute(PUBDATE_QUERY, {'uri': uri}).fetchone() or [None] + if col: + return datetime.datetime.strptime(col, '%Y-%m-%d %H:%M:%S') else: - return _fulltext_search(cursor, query, offset, limit) + return None + + +def distinct(connection, expr, **params): + sql = DISTINCT_QUERY % expr + params = dict(DEFAULT_PARAMS, **params) + logger.debug('Distinct query: %s %r', sql, params) + return connection.execute(sql, params) -def update(cursor, podcast): - cursor.execute(UPDATE_PODCAST, { +def search(connection, **params): + params = dict(DEFAULT_PARAMS, **params) + logger.debug('Indexed query: %s %r', INDEXED_QUERY, params) + return connection.execute(INDEXED_QUERY, params) + + +def ftsearch(connection, uri=None, offset=0, limit=-1, **params): + # SQLite MATCH clauses cannot be combined with AND or OR, and + # phrase queries may not be used with column names... + sql = FULLTEXT_QUERY % ( + ' INTERSECT '.join( + _match('ftpodcast', FTPODCAST_COLS, key) for key in params + ), + ' INTERSECT '.join( + _match('ftepisode', FTEPISODE_COLS, key) for key in params + ) + ) + params.update(limit=limit, offset=offset, uri=uri) + logger.debug('Fulltext query: %s %r', sql, params) + return connection.execute(sql, params) + + +def update(connection, podcast): + if podcast.pubdate and podcast.pubdate == pubdate(connection, podcast.uri): + return # assume nothing changed + connection.execute(UPDATE_PODCAST, { 'uri': podcast.uri, 'title': podcast.title, 'link': podcast.link, 'copyright': podcast.copyright, 'language': podcast.language, + 'pubdate': podcast.pubdate, 'author': podcast.author, 'block': podcast.block, 'category': podcast.category, @@ -204,7 +243,7 @@ def update(cursor, podcast): 'newfeedurl': podcast.newfeedurl, 'description': podcast.description }) - cursor.executemany(UPDATE_EPISODE, [{ + connection.executemany(UPDATE_EPISODE, [{ 'uri': podcast.uri, 'guid': episode.guid, 'title': episode.title, @@ -217,43 +256,14 @@ def update(cursor, podcast): } for episode in podcast.episodes]) -def cleanup(cursor, uris): - sql = 'DELETE FROM podcast WHERE uri NOT IN (%s)' % ( - ', '.join(['?'] * len(uris)) - ) - return cursor.execute(sql, uris) - - -def _indexed_search(cursor, query, offset=0, limit=-1): - params = dict.fromkeys(PARAMETERS.values(), None) - for term in query.terms: - params[PARAMETERS[term.field]] = ' '.join(term.values) - params.update(offset=offset, limit=limit) - return cursor.execute(INDEXED_QUERY, params) +def delete(connection, uri): + return connection.execute(DELETE_PODCAST, {'uri': uri}) -def _fulltext_search(cursor, query, offset=0, limit=-1): - params = dict.fromkeys(PARAMETERS.values(), None) - for term in query.terms: - params[PARAMETERS[term.field]] = ' '.join(map(_quote, term.values)) - params.update(offset=offset, limit=limit) - # SQLite MATCH clauses cannot be combined with AND or OR - # TODO: skip podcast search if field not available? - sql = FULLTEXT_QUERY % ( - ' INTERSECT '.join( - 'SELECT docid FROM ftpodcast WHERE %s MATCH :%s' % (col, key) - for key, col in FTPODCAST_COLS.items() - if params[key] is not None - ), - ' INTERSECT '.join( - 'SELECT docid FROM ftepisode WHERE %s MATCH :%s' % (col, key) - for key, col in FTEPISODE_COLS.items() - if params[key] is not None - ) - ) - # logger.debug('Fulltext query: %r %r', sql, params) - return cursor.execute(sql, params) - - -def _quote(value, re=re.compile(r'["^*]|NEAR|AND|OR')): - return '"%s"' % re.sub('', value) +def _match(table, cols, key): + try: + col = cols[key] + except KeyError: + return 'SELECT NULL' + else: + return 'SELECT docid FROM %s WHERE %s MATCH :%s' % (table, col, key) diff --git a/mopidy_podcast/sql/schema.sql b/mopidy_podcast/sql/schema.sql index 3647c0c..af402c0 100644 --- a/mopidy_podcast/sql/schema.sql +++ b/mopidy_podcast/sql/schema.sql @@ -10,6 +10,7 @@ CREATE TABLE podcast ( link TEXT, -- URL of the podcast's website copyright TEXT, -- podcast copyright notice language TEXT, -- ISO two-letter language code + pubdate TEXT, -- podcast last publication date and time author TEXT, -- podcast author's name block INTEGER, -- whether the podcast should be blocked category TEXT, -- the podcast's main category @@ -21,14 +22,15 @@ CREATE TABLE podcast ( CREATE TABLE episode ( uri TEXT REFERENCES podcast(uri) ON DELETE CASCADE ON UPDATE CASCADE, - guid TEXT PRIMARY KEY, -- episode GUID + guid TEXT NOT NULL, -- episode GUID title TEXT NOT NULL, -- episode title pubdate TEXT, -- episode publication date and time author TEXT, -- episode author's name block INTEGER, -- whether the epidode should be blocked duration REAL, -- episode duration in seconds explicit INTEGER, -- whether the episode contains explicit material - description TEXT -- description of the episode + description TEXT, -- description of the episode + PRIMARY KEY (uri, guid) -- GUIDs may not be as unique as they shoulde be ); CREATE INDEX podcast_title_index ON podcast (title); diff --git a/mopidy_podcast/translator.py b/mopidy_podcast/translator.py index edbad77..05d8698 100644 --- a/mopidy_podcast/translator.py +++ b/mopidy_podcast/translator.py @@ -1,58 +1,49 @@ from __future__ import unicode_literals +import collections import operator +import re from mopidy.models import Album, Artist, Ref, Track import uritools -from . import Extension, models - -_FIELDS = { - 'any': None, - 'album': models.Podcast.title, - 'albumartist': models.Podcast.author, - 'artist': models.Episode.author, - 'comment': models.Episode.description, - 'date': models.Episode.pubdate, - 'genre': models.Podcast.category, - 'track_name': models.Episode.title +from . import Extension + +_EXPRESSIONS = { + # field is "track", while search keyword is "track_name"? + 'track': 'episode.title', + 'artist': 'episode.author', + 'albumartist': 'podcast.author', + 'album': 'podcast.title', + 'date': 'date(pubdate)', + 'genre': 'category' +} + +_PARAMETERS = { + 'track_name': 'episode_title', + 'album': 'podcast_title', + 'artist': 'episode_author', + 'albumartist': 'podcast_author', + 'genre': 'category', + 'date': 'pubdate', + 'comment': 'description', + 'any': 'any' } -def _trackuri(uri, guid, safe=uritools.SUB_DELIMS+b':@/?'): +def _albumuri(feedurl): + return uritools.uridefrag(Extension.ext_name + '+' + feedurl).uri + + +def _trackuri(albumuri, guid, safe=uritools.SUB_DELIMS+b':@/?'): # timeit shows approx. factor 3 difference - # return uri + uritools.uricompose(fragment=guid) - return uri + '#' + uritools.uriencode(guid, safe=safe) - - -def _track(episode, album, **kwargs): - return Track( - uri=_trackuri(album.uri, episode.guid), - name=episode.title, - album=album, - artists=( - [Artist(name=episode.author)] - if episode.author - else None - ), - date=( - episode.pubdate.date().isoformat() - if episode.pubdate - else None - ), - length=( - int(episode.duration.total_seconds() * 1000) - if episode.duration - else None - ), - comment=episode.description, - **kwargs - ) + # return albumuri + uritools.uricompose(fragment=guid) + return albumuri + '#' + uritools.uriencode(guid, safe=safe) -def ref(feedurl, title, guid=None): - uri, _ = uritools.uridefrag(Extension.ext_name + '+' + feedurl) +def ref(feedurl, title, guid=None, *args): + uri = _albumuri(feedurl) if guid: return Ref.track(uri=_trackuri(uri, guid), name=title) else: @@ -60,58 +51,84 @@ def ref(feedurl, title, guid=None): def album(podcast): - uri, _ = uritools.uridefrag(Extension.ext_name + '+' + podcast.uri) return Album( - uri=uri, + uri=_albumuri(podcast.uri), name=podcast.title, - artists=( - [Artist(name=podcast.author)] - if podcast.author - else None - ), + artists=([Artist(name=podcast.author)] if podcast.author else None), num_tracks=len(podcast.episodes) ) -def tracks(podcast, key=operator.attrgetter('pubdate'), reverse=False): - uri, _ = uritools.uridefrag(Extension.ext_name + '+' + podcast.uri) - album = Album( - uri=uri, - name=podcast.title, - artists=( - [Artist(name=podcast.author)] - if podcast.author - else None - ), - num_tracks=len(podcast.episodes) - ) - genre = podcast.category - # TODO: support ? - episodes = sorted(podcast.episodes, key=key, reverse=reverse) - for index, episode in enumerate(episodes, start=1): - # TODO: filter block/media type? - if episode.enclosure and episode.enclosure.uri: - yield _track(episode, album=album, genre=genre, track_no=index) +def tracks(podcast, key=operator.attrgetter('pubdate'), _album=album): + album = _album(podcast) + result = collections.OrderedDict() + # TODO: support + for index, episode in enumerate(sorted(podcast.episodes, key=key), 1): + # TODO: filter by block/explicit/media type? + if not episode.guid: + continue + if not episode.enclosure or not episode.enclosure.uri: + continue + uri = _trackuri(album.uri, episode.guid) + result[uri] = Track( + uri=uri, + name=episode.title, + album=album, + artists=( + [Artist(name=episode.author)] + if episode.author + else None + ), + date=( + episode.pubdate.date().isoformat() + if episode.pubdate + else None + ), + length=( + int(episode.duration.total_seconds() * 1000) + if episode.duration + else None + ), + comment=episode.description, + genre=podcast.category, + track_no=index + ) + return result def images(podcast): - uri, _ = uritools.uridefrag(Extension.ext_name + '+' + podcast.uri) - default = [podcast.image] if podcast.image else None + uri = _albumuri(podcast.uri) + default = [podcast.image] if podcast.image else [] + result = {uri: default} for episode in podcast.episodes: if episode.image: - yield (_trackuri(uri, episode.guid), [episode.image]) + images = [episode.image] + default else: - yield (_trackuri(uri, episode.guid), default) + images = default + result[_trackuri(uri, episode.guid)] = images + return result + + +def field(name): + try: + expr = _EXPRESSIONS[name] + except KeyError: + raise NotImplementedError('Field "%s" not supported' % name) + else: + return expr -def query(query, uris, exact=False): - # TODO: uris - terms = [] +def query(query, exact=True, re=re.compile(r'["^*]')): + params = {} for key, values in query.items(): + if exact: + value = ''.join(values) # FIXME: multi-valued exact queries? + else: + value = ' '.join('"%s"' % re.sub(' ', v) for v in values) try: - field = _FIELDS[key] + name = _PARAMETERS[key] except KeyError: - raise NotImplementedError('Search key "%s" not supported' % key) + raise NotImplementedError('Search field "%s" not supported' % key) else: - terms.append(models.Term(field=field, values=values)) - return models.Query(terms=terms, exact=exact) + params[name] = value + return params diff --git a/setup.py b/setup.py index 66ae20c..a89bb95 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ def get_version(filename): 'Mopidy >= 1.1.1', 'Pykka >= 1.1', 'cachetools >= 1.0', + 'requests >= 2.0', 'uritools >= 1.0' ], entry_points={ diff --git a/tests/test_extension.py b/tests/test_extension.py index 17a5efc..8809160 100644 --- a/tests/test_extension.py +++ b/tests/test_extension.py @@ -15,7 +15,6 @@ def test_get_config_schema(): assert 'import_dir' in schema assert 'update_interval' in schema assert 'browse_order' in schema - assert 'lookup_order' in schema assert 'cache_size' in schema assert 'cache_ttl' in schema assert 'timeout' in schema