diff --git a/indexer.py b/indexer.py index 2adf638..e24c657 100644 --- a/indexer.py +++ b/indexer.py @@ -1,4 +1,19 @@ # -*- coding: utf-8 -*- +"""Music indexer for the Shiva-Server API. +Index your music collection and (optionally) retrieve album covers and artist +pictures from Last.FM. + +Usage: + shiva-indexer [-h] [-v] [-q] [--lastfm] [--nometadata] [--reindex] + +Options: + -h, --help Show this help message and exit + --lastfm Retrieve artist and album covers from Last.FM API. + --nometadata Don't read file's metadata when indexing. + --reindex Remove all existing data from the database before indexing. + -v --verbose Show debugging messages about the progress. + -q --quiet Suppress warnings. +""" # K-Pg import logging from datetime import datetime @@ -7,39 +22,27 @@ from shiva import models as m from shiva.app import app, db -from shiva.utils import ID3Manager +from shiva.utils import MetadataManager q = db.session.query -USAGE = """Usage: %s [-h] [--lastfm] [--nometadata] - -Music indexer for the Shiva-Server API. - -Index your music collection and (optionally) retrieve album covers and artist -pictures from Last.FM. - -optional arguments: - -h, --help Show this help message and exit - --lastfm Retrieve artist and album covers from Last.FM API. - --nometadata Don't read file's metadata when indexing. -""" % sys.argv[0] - -if '--help' in sys.argv or '-h' in sys.argv: - print(USAGE) - sys.exit(0) - class Indexer(object): - def __init__(self, config=None, use_lastfm=False, no_metadata=False): + def __init__(self, config=None, use_lastfm=False, no_metadata=False, reindex=False, + verbose=False, quiet=False): self.config = config self.use_lastfm = use_lastfm self.no_metadata = no_metadata + self.verbose = verbose + self.quiet = quiet - self.count = 0 + self.track_count = 0 self.session = db.session self.media_dirs = config.get('MEDIA_DIRS', []) - self.id3r = None + + self._meta = None + self.artists = {} self.albums = {} @@ -50,10 +53,17 @@ def __init__(self, config=None, use_lastfm=False, no_metadata=False): api_key = config['LASTFM_API_KEY'] self.lastfm = self.pylast.LastFMNetwork(api_key=api_key) - if len(self.media_dirs) == 0: + if not len(self.media_dirs): print("Remember to set the MEDIA_DIRS option, otherwise I don't " 'know where to look for.') + if reindex: + models = [m.Artist, m.Album, m.Track, m.Lyrics] + for model in models: + print('Deleting all rows from {} model...'.format(model.__name__)) + model.query.delete() + self.session.commit() + def get_artist(self, name): if name in self.artists: return self.artists[name] @@ -92,14 +102,14 @@ def get_album(self, name, artist): def get_release_year(self, lastfm_album=None): if not self.use_lastfm or not lastfm_album: - return self.get_id3_reader().release_year + return self.get_metadata_reader().release_year _date = lastfm_album.get_release_date() if not _date: - if not self.get_id3_reader().release_year: + if not self.get_metadata_reader().release_year: return None - return self.get_id3_reader().release_year + return self.get_metadata_reader().release_year return datetime.strptime(_date, '%d %b %Y, %H:%M').year @@ -112,22 +122,22 @@ def save_track(self): full_path = self.file_path.decode('utf-8') - print(self.file_path) - track = m.Track(full_path) if self.no_metadata: self.session.add(track) - - return True + if self.verbose: + print 'Added track without metadata: %s' % full_path + return else: if q(m.Track).filter_by(path=full_path).count(): - return True + if self.verbose: + print 'Skipped existing track: %s' % full_path + return - use_prev = None - id3r = self.get_id3_reader() + meta = self.get_metadata_reader() - artist = self.get_artist(id3r.artist) - album = self.get_album(id3r.album, artist) + artist = self.get_artist(meta.artist) + album = self.get_album(meta.album, artist) if artist is not None and artist not in album.artists: album.artists.append(artist) @@ -136,73 +146,81 @@ def save_track(self): track.artist = artist self.session.add(track) - return True + if self.verbose: + print 'Added track: %s' % full_path - def get_id3_reader(self): - if not self.id3r or not self.id3r.same_path(self.file_path): - self.id3r = ID3Manager(self.file_path) + self.track_count += 1 + if self.track_count % 10 == 0: + self.session.commit() + if self.verbose: + print 'Writing to database...' - return self.id3r + def get_metadata_reader(self): + if not self._meta or self._meta.origpath != self.file_path: + self._meta = MetadataManager(self.file_path) + return self._meta def is_track(self): - """Tries to guess whether the file is a valid track or not. - """ - if os.path.isdir(self.file_path): + """Try to guess whether the file is a valid track or not.""" + if not os.path.isfile(self.file_path): return False if '.' not in self.file_path: return False - ext = self.file_path[self.file_path.rfind('.') + 1:] - if ext not in self.config.get('ACCEPTED_FORMATS', []): - return False - - if not self.get_id3_reader().is_valid(): + ext = self.file_path.rsplit('.', 1)[1] + if ext not in self.get_metadata_reader().VALID_FILE_EXTENSIONS: + if not self.quiet: + msg = 'Skipped file with unknown file extension: %s' + print msg % self.file_path return False return True - def walk(self, dir_name): - """Recursively walks through a directory looking for tracks. - """ + def walk(self, target): + """Recursively walks through a directory looking for tracks.""" - self.count += 1 - if self.count % 10 == 0: - self.session.commit() + # If target is a file, try to save it as a track + if os.path.isfile(target): + self.file_path = target + if self.is_track(): + self.save_track() - if os.path.isdir(dir_name): - for name in os.listdir(dir_name): - self.file_path = os.path.join(dir_name, name) - if os.path.isdir(self.file_path): - self.walk(self.file_path) - else: + # Otherwise, recursively walk the directory looking for files + else: + for root, dirs, files in os.walk(target): + for name in files: + self.file_path = os.path.join(root, name) if self.is_track(): - try: - self.save_track() - except Exception, e: - logging.warning("%s not imported - %s" % ( - self.file_path, e.message)) - - return True + self.save_track() def run(self): for mobject in self.media_dirs: for mdir in mobject.get_valid_dirs(): self.walk(mdir) -if __name__ == '__main__': - use_lastfm = '--lastfm' in sys.argv - no_metadata = '--nometadata' in sys.argv - - if no_metadata: - use_lastfm = False - if use_lastfm and not app.config.get('LASTFM_API_KEY'): - print('ERROR: You need a Last.FM API key if you set the --lastfm ' +if __name__ == '__main__': + from docopt import docopt + arguments = docopt(__doc__) + + kwargs = { + 'use_lastfm': arguments['--lastfm'], + 'no_metadata': arguments['--nometadata'], + 'reindex': arguments['--reindex'], + 'verbose': arguments['--verbose'], + 'quiet': arguments['--quiet'], + } + + if kwargs['no_metadata']: + kwargs['use_lastfm'] = False + + if kwargs['use_lastfm'] and not app.config.get('LASTFM_API_KEY'): + sys.stderr.write('ERROR: You need a Last.FM API key if you set the --lastfm ' 'flag.\n') sys.exit(1) - lola = Indexer(app.config, use_lastfm=use_lastfm, no_metadata=no_metadata) + lola = Indexer(app.config, **kwargs) lola.run() # Petit performance hack: Every track will be added to the session but they diff --git a/requirements.pip b/requirements.pip index 6aa9a07..ebab296 100644 --- a/requirements.pip +++ b/requirements.pip @@ -1,8 +1,10 @@ Flask==0.9 Flask-Restful==0.1.2 Flask-SQLAlchemy==0.16 -eyed3==0.7.1 requests==1.0.4 translitcodec==0.3 pyLast==0.5.11 lxml==3.1beta1 +mutagen==1.21 +docopt==0.6.1 +python-dateutil==2.1 diff --git a/shiva/models.py b/shiva/models.py index 1dd7720..4a651d8 100644 --- a/shiva/models.py +++ b/shiva/models.py @@ -3,11 +3,11 @@ from flask.ext.sqlalchemy import SQLAlchemy -from shiva.utils import slugify as do_slug, randstr, ID3Manager +from shiva.utils import slugify as do_slug, randstr, MetadataManager db = SQLAlchemy() -__all__ = ('db', 'Artist', 'Album', 'Track') +__all__ = ('db', 'Artist', 'Album', 'Track', 'Lyrics') def slugify(model, field_name): @@ -46,7 +46,7 @@ class Artist(db.Model): __tablename__ = 'artists' pk = db.Column(db.Integer, primary_key=True) - # TODO: Update the files' ID3 tags when changing this info. + # TODO: Update the files' Metadata when changing this info. name = db.Column(db.String(128), nullable=False) slug = db.Column(db.String(128), unique=True, nullable=False) image = db.Column(db.String(256)) @@ -98,8 +98,7 @@ def __repr__(self): class Track(db.Model): - """ - """ + """Track model.""" __tablename__ = 'tracks' @@ -109,7 +108,9 @@ class Track(db.Model): slug = db.Column(db.String(128), unique=True) bitrate = db.Column(db.Integer) file_size = db.Column(db.Integer) + # TODO could be float if number weren't converted to an int in metadata manager length = db.Column(db.Integer) + # TODO number should probably be renamed to track or track_number number = db.Column(db.Integer) lyrics = db.relationship('Lyrics', backref='track', uselist=False) @@ -119,7 +120,7 @@ class Track(db.Model): nullable=True) def __init__(self, path): - if type(path) not in (unicode, str, file): + if not isinstance(path, (basestring, file)): raise ValueError('Invalid parameter for Track. Path or File ' 'expected, got %s' % type(path)) @@ -128,7 +129,7 @@ def __init__(self, path): _path = path.name self.set_path(_path) - self._id3r = None + self._meta = None def __setattr__(self, attr, value): if attr == 'title': @@ -146,19 +147,18 @@ def set_path(self, path): if path != self.get_path(): self.path = path if os.path.exists(self.get_path()): - self.file_size = self.get_id3_reader().size - self.bitrate = self.get_id3_reader().bitrate - self.length = self.get_id3_reader().length - self.number = self.get_id3_reader().track_number - self.title = self.get_id3_reader().title - - def get_id3_reader(self): - """Returns an object with the ID3 info reader. - """ - if not getattr(self, '_id3r', None): - self._id3r = ID3Manager(self.get_path()) - - return self._id3r + meta = self.get_metadata_reader() + self.file_size = meta.filesize + self.bitrate = meta.bitrate + self.length = meta.length + self.number = meta.track_number + self.title = meta.title + + def get_metadata_reader(self): + """Return a MetadataManager object.""" + if not getattr(self, '_meta', None): + self._meta = MetadataManager(self.get_path()) + return self._meta def __repr__(self): return "" % self.title diff --git a/shiva/resources.py b/shiva/resources.py index 6c368f9..33ce59e 100644 --- a/shiva/resources.py +++ b/shiva/resources.py @@ -499,8 +499,8 @@ def __init__(self, artist, json): self.venue = json['venue'] def split_artists(self, json): - if len(json) == 0: - ([], []) + if not len(json): + return ([], []) elif len(json) == 1: artist = Artist.query.filter_by(name=json[0]['name']).first() diff --git a/shiva/utils.py b/shiva/utils.py index b92e425..66ad2c6 100644 --- a/shiva/utils.py +++ b/shiva/utils.py @@ -1,10 +1,13 @@ # -*- coding: utf-8 -*- import os import re +import datetime from random import random from hashlib import md5 -import translitcodec +import translitcodec # don't remove! +import mutagen +import dateutil.parser PUNCT_RE = re.compile(r'[\t !"#$%&\'()*\-/<=>?@\[\\\]^_`{|},.]+') @@ -45,99 +48,136 @@ def _import(class_path): return getattr(mod, cls_name) -class ID3Manager(object): - def __init__(self, mp3_path): - import eyed3 # FIXME: Replace ASAP - - self.mp3_path = mp3_path - self.reader = eyed3.load(mp3_path) - - if not self.reader.tag: - self.reader.tag = eyed3.id3.Tag() - self.reader.tag.track_num = (None, None) - - if self.reader.tag.album is None: - self.reader.tag.album = u'' - - if self.reader.tag.artist is None: - self.reader.tag.artist = u'' - - try: - self.reader.tag.save(mp3_path) - except (AttributeError, NotImplementedError), e: - print('[ERROR] %s' % e) - - def __getattribute__(self, attr): - _super = super(ID3Manager, self) - try: - _getter = _super.__getattribute__('get_%s' % attr) - except AttributeError: - _getter = None - if _getter: - return _getter() - - return super(ID3Manager, self).__getattribute__(attr) - - def __setattr__(self, attr, value): - value = value.strip() if isinstance(value, (str, unicode)) else value - _setter = getattr(self, 'set_%s' % attr, None) - if _setter: - _setter(value) - - super(ID3Manager, self).__setattr__(attr, value) - - def is_valid(self): - if not self.reader.path: - return False - - return True - - def get_path(self): - return self.mp3_path - - def same_path(self, path): - return path == self.mp3_path - - def get_artist(self): - return self.reader.tag.artist.strip() - - def set_artist(self, name): - self.reader.tag.artist = name - self.reader.tag.save() - - def get_album(self): - return self.reader.tag.album.strip() - - def set_album(self, name): - self.reader.tag.album = name - self.reader.tag.save() - - def get_release_year(self): - rdate = self.reader.tag.release_date - return rdate.year if rdate else None - - def set_release_year(self, year): - self.release_date.year = year - self.reader.tag.save() - - def get_bitrate(self): - return self.reader.info.bit_rate[1] - - def get_length(self): - return self.reader.info.time_secs - - def get_track_number(self): - return self.reader.tag.track_num[0] - - def get_title(self): - if not self.reader.tag.title: - _title = raw_input('Song title: ').decode('utf-8').strip() - self.reader.tag.title = _title - self.reader.tag.save() - - return self.reader.tag.title - - def get_size(self): - """Computes the size of the mp3 file in filesystem. - """ - return os.stat(self.reader.path).st_size +class MetadataManager(object): + """A format-agnostic metadata wrapper around Mutagen. + + This makes reading/writing audio metadata easy across all possible audio + formats by using properties for the different keys. + + In order to persist changes to the metadata, the ``save()`` method needs to + be called. + + """ + def __init__(self, filepath): + self._original_path = filepath + self.reader = mutagen.File(filepath, easy=True) + + # Static attributes + + VALID_FILE_EXTENSIONS = [ + 'asf', 'wma', # ASF + 'flac', # FLAC + 'mp4', 'm4a', 'm4b', 'm4p', # M4A + 'ape', # Monkey's Audio + 'mp3', # MP3 + 'mpc', 'mp+', 'mpp', # Musepack + 'spx', # Ogg Speex + 'ogg', 'oga', # Ogg Vorbis / Theora + 'tta', # True Audio + 'wv', # WavPack + 'ofr', # OptimFROG + ] + + # Metadata properties + + @property + def title(self): + return self._getter('title') + + @property + def artist(self): + """The artist name.""" + return self._getter('artist') + + @artist.setter + def artist(self, value): + self.reader['artist'] = value + + @property + def album(self): + """The album name.""" + return self._getter('album') + + @album.setter + def album(self, value): + self.reader['album'] = value + + @property + def release_year(self): + """The album release year.""" + default_date = datetime.datetime(datetime.MINYEAR, 1, 1) + date = self._getter('date', '') + parsed_date = dateutil.parser.parse(date, default=default_date) + if parsed_date != default_date: + return parsed_date.year + return None + + @release_year.setter + def release_year(self, value): + self.reader['year'] = value + + @property + def track_number(self): + """The track number.""" + return self._getter('tracknumber') + + @track_number.setter + def track_number(self, value): + self.reader['tracknumber'] = value + + @property + def genre(self): + """The music genre.""" + return self._getter('genre') + + @genre.setter + def genre(self, value): + self.genre = value + + @property + def length(self): + """The length of the song in seconds.""" + return int(round(self.reader.info.length)) + + @property + def bitrate(self): + """The audio bitrate.""" + return self.reader.info.bitrate + + @property + def sample_rate(self): + """The audio sample rate.""" + return self.reader.info.sample_rate + + @property + def filename(self): + """The file name of this audio file.""" + return os.path.basename(self.reader.filename) + + @property + def filepath(self): + """The absolute path to this audio file.""" + return os.path.abspath(self.reader.filename) + + @property + def origpath(self): + """The original path with which this class was instantiated. This + function avoids a call to ``os.path``. Usually you'll want to use + either :meth:`.filename` or :meth:`.filepath` instead.""" + return self._original_path + + @property + def filesize(self): + """The size of this audio file in the filesystem.""" + return os.stat(self.reader.filename).st_size + + # Helper functions + + def _getter(self, attr, fallback=None): + """Return the first list item of the specified attribute or fall back + to a default value if attribute is not available.""" + return self.reader[attr][0] if attr in self.reader else fallback + + def save(self): + """Save changes to file metadata.""" + self.reader.save()