Replace Eyed3 with Mutagen #47

Closed
wants to merge 13 commits into
from
View
@@ -1,4 +1,19 @@
# -*- coding: utf-8 -*-
+"""Music indexer for the Shiva-Server API.
+Index your music collection and (optionally) retrieve album covers and artist
+pictures from Last.FM.
+
+Usage:
+ shiva-indexer [-h] [-v] [-q] [--lastfm] [--nometadata] [--reindex]
+
+Options:
+ -h, --help Show this help message and exit
+ --lastfm Retrieve artist and album covers from Last.FM API.
+ --nometadata Don't read file's metadata when indexing.
+ --reindex Remove all existing data from the database before indexing.
+ -v --verbose Show debugging messages about the progress.
+ -q --quiet Suppress warnings.
+"""
# K-Pg
import logging
from datetime import datetime
@@ -7,39 +22,27 @@
from shiva import models as m
from shiva.app import app, db
-from shiva.utils import ID3Manager
+from shiva.utils import MetadataManager
q = db.session.query
-USAGE = """Usage: %s [-h] [--lastfm] [--nometadata]
-
-Music indexer for the Shiva-Server API.
-
-Index your music collection and (optionally) retrieve album covers and artist
-pictures from Last.FM.
-
-optional arguments:
- -h, --help Show this help message and exit
- --lastfm Retrieve artist and album covers from Last.FM API.
- --nometadata Don't read file's metadata when indexing.
-""" % sys.argv[0]
-
-if '--help' in sys.argv or '-h' in sys.argv:
- print(USAGE)
- sys.exit(0)
-
class Indexer(object):
- def __init__(self, config=None, use_lastfm=False, no_metadata=False):
+ def __init__(self, config=None, use_lastfm=False, no_metadata=False, reindex=False,
+ verbose=False, quiet=False):
self.config = config
self.use_lastfm = use_lastfm
self.no_metadata = no_metadata
+ self.verbose = verbose
+ self.quiet = quiet
- self.count = 0
+ self.track_count = 0
self.session = db.session
self.media_dirs = config.get('MEDIA_DIRS', [])
- self.id3r = None
+
+ self._meta = None
+
self.artists = {}
self.albums = {}
@@ -50,10 +53,17 @@ def __init__(self, config=None, use_lastfm=False, no_metadata=False):
api_key = config['LASTFM_API_KEY']
self.lastfm = self.pylast.LastFMNetwork(api_key=api_key)
- if len(self.media_dirs) == 0:
+ if not len(self.media_dirs):
print("Remember to set the MEDIA_DIRS option, otherwise I don't "
'know where to look for.')
+ if reindex:
+ models = [m.Artist, m.Album, m.Track, m.Lyrics]
+ for model in models:
+ print('Deleting all rows from {} model...'.format(model.__name__))
+ model.query.delete()
+ self.session.commit()
+
def get_artist(self, name):
if name in self.artists:
return self.artists[name]
@@ -92,14 +102,14 @@ def get_album(self, name, artist):
def get_release_year(self, lastfm_album=None):
if not self.use_lastfm or not lastfm_album:
- return self.get_id3_reader().release_year
+ return self.get_metadata_reader().release_year
_date = lastfm_album.get_release_date()
if not _date:
- if not self.get_id3_reader().release_year:
+ if not self.get_metadata_reader().release_year:
return None
- return self.get_id3_reader().release_year
+ return self.get_metadata_reader().release_year
return datetime.strptime(_date, '%d %b %Y, %H:%M').year
@@ -112,22 +122,22 @@ def save_track(self):
full_path = self.file_path.decode('utf-8')
- print(self.file_path)
-
track = m.Track(full_path)
if self.no_metadata:
self.session.add(track)
-
- return True
+ if self.verbose:
+ print 'Added track without metadata: %s' % full_path
+ return
else:
if q(m.Track).filter_by(path=full_path).count():
- return True
+ if self.verbose:
+ print 'Skipped existing track: %s' % full_path
+ return
- use_prev = None
- id3r = self.get_id3_reader()
+ meta = self.get_metadata_reader()
- artist = self.get_artist(id3r.artist)
- album = self.get_album(id3r.album, artist)
+ artist = self.get_artist(meta.artist)
+ album = self.get_album(meta.album, artist)
if artist is not None and artist not in album.artists:
album.artists.append(artist)
@@ -136,73 +146,81 @@ def save_track(self):
track.artist = artist
self.session.add(track)
- return True
+ if self.verbose:
+ print 'Added track: %s' % full_path
- def get_id3_reader(self):
- if not self.id3r or not self.id3r.same_path(self.file_path):
- self.id3r = ID3Manager(self.file_path)
+ self.track_count += 1
+ if self.track_count % 10 == 0:
+ self.session.commit()
+ if self.verbose:
+ print 'Writing to database...'
- return self.id3r
+ def get_metadata_reader(self):
+ if not self._meta or self._meta.origpath != self.file_path:
+ self._meta = MetadataManager(self.file_path)
+ return self._meta
def is_track(self):
- """Tries to guess whether the file is a valid track or not.
- """
- if os.path.isdir(self.file_path):
+ """Try to guess whether the file is a valid track or not."""
+ if not os.path.isfile(self.file_path):
return False
if '.' not in self.file_path:
return False
- ext = self.file_path[self.file_path.rfind('.') + 1:]
- if ext not in self.config.get('ACCEPTED_FORMATS', []):
- return False
-
- if not self.get_id3_reader().is_valid():
+ ext = self.file_path.rsplit('.', 1)[1]
+ if ext not in self.get_metadata_reader().VALID_FILE_EXTENSIONS:
+ if not self.quiet:
+ msg = 'Skipped file with unknown file extension: %s'
+ print msg % self.file_path
return False
return True
- def walk(self, dir_name):
- """Recursively walks through a directory looking for tracks.
- """
+ def walk(self, target):
+ """Recursively walks through a directory looking for tracks."""
- self.count += 1
- if self.count % 10 == 0:
- self.session.commit()
+ # If target is a file, try to save it as a track
+ if os.path.isfile(target):
+ self.file_path = target
+ if self.is_track():
+ self.save_track()
- if os.path.isdir(dir_name):
- for name in os.listdir(dir_name):
- self.file_path = os.path.join(dir_name, name)
- if os.path.isdir(self.file_path):
- self.walk(self.file_path)
- else:
+ # Otherwise, recursively walk the directory looking for files
+ else:
+ for root, dirs, files in os.walk(target):
@tooxie

tooxie Mar 31, 2013

Owner

How does os.walk perform with lots of files and directories? Is it better that calling self.walk recursively? I honestly don't know, many of Shiva users have large collections, this may make a big difference.

@dbrgn

dbrgn Mar 31, 2013

Collaborator

I doubt that os.walk will be slower than os.listdir, it uses the same top-down recursive approach. And readability is better. IMO using os.listdir would be premature optimization if you don't already know that it will be faster.

I tried to measure it but it's pretty hard because there seems to be some caching going on. The first time I walked my music folder it took 5.5 seconds, the following times it was down to 1.8 to 2 seconds.

A possibility to speed up things is mentioned here: http://stackoverflow.com/a/4739544/284318 But it's filesystem specific.

@felixhummel

felixhummel Apr 2, 2013

Collaborator

I let os.walk run against my 200GB music collection which is usually at least three directory levels deep (genre, artist, album).
Also I find os.walk much more readable.

@tooxie

tooxie Apr 2, 2013

Owner

I was thinking about the fact that os.walk returns a list, if it had to read all the filesystem first and load it into memory it would be quite slow. But it has to be a generator, which makes it definitely faster than calling os.listdir recursively. 👍

+ for name in files:
+ self.file_path = os.path.join(root, name)
if self.is_track():
- try:
- self.save_track()
- except Exception, e:
- logging.warning("%s not imported - %s" % (
- self.file_path, e.message))
-
- return True
+ self.save_track()
def run(self):
for mobject in self.media_dirs:
for mdir in mobject.get_valid_dirs():
self.walk(mdir)
-if __name__ == '__main__':
- use_lastfm = '--lastfm' in sys.argv
- no_metadata = '--nometadata' in sys.argv
-
- if no_metadata:
- use_lastfm = False
- if use_lastfm and not app.config.get('LASTFM_API_KEY'):
- print('ERROR: You need a Last.FM API key if you set the --lastfm '
+if __name__ == '__main__':
+ from docopt import docopt
+ arguments = docopt(__doc__)
+
+ kwargs = {
+ 'use_lastfm': arguments['--lastfm'],
+ 'no_metadata': arguments['--nometadata'],
+ 'reindex': arguments['--reindex'],
+ 'verbose': arguments['--verbose'],
+ 'quiet': arguments['--quiet'],
+ }
+
+ if kwargs['no_metadata']:
+ kwargs['use_lastfm'] = False
+
+ if kwargs['use_lastfm'] and not app.config.get('LASTFM_API_KEY'):
+ sys.stderr.write('ERROR: You need a Last.FM API key if you set the --lastfm '
'flag.\n')
sys.exit(1)
- lola = Indexer(app.config, use_lastfm=use_lastfm, no_metadata=no_metadata)
+ lola = Indexer(app.config, **kwargs)
lola.run()
# Petit performance hack: Every track will be added to the session but they
View
@@ -1,8 +1,10 @@
Flask==0.9
Flask-Restful==0.1.2
Flask-SQLAlchemy==0.16
-eyed3==0.7.1
@dbrgn

dbrgn Mar 31, 2013

Collaborator

🍻

requests==1.0.4
translitcodec==0.3
pyLast==0.5.11
lxml==3.1beta1
+mutagen==1.21
+docopt==0.6.1
+python-dateutil==2.1
View
@@ -3,11 +3,11 @@
from flask.ext.sqlalchemy import SQLAlchemy
-from shiva.utils import slugify as do_slug, randstr, ID3Manager
+from shiva.utils import slugify as do_slug, randstr, MetadataManager
db = SQLAlchemy()
-__all__ = ('db', 'Artist', 'Album', 'Track')
+__all__ = ('db', 'Artist', 'Album', 'Track', 'Lyrics')
def slugify(model, field_name):
@@ -46,7 +46,7 @@ class Artist(db.Model):
__tablename__ = 'artists'
pk = db.Column(db.Integer, primary_key=True)
- # TODO: Update the files' ID3 tags when changing this info.
+ # TODO: Update the files' Metadata when changing this info.
name = db.Column(db.String(128), nullable=False)
slug = db.Column(db.String(128), unique=True, nullable=False)
image = db.Column(db.String(256))
@@ -98,8 +98,7 @@ def __repr__(self):
class Track(db.Model):
- """
- """
+ """Track model."""
__tablename__ = 'tracks'
@@ -109,7 +108,9 @@ class Track(db.Model):
slug = db.Column(db.String(128), unique=True)
bitrate = db.Column(db.Integer)
file_size = db.Column(db.Integer)
+ # TODO could be float if number weren't converted to an int in metadata manager
length = db.Column(db.Integer)
+ # TODO number should probably be renamed to track or track_number
number = db.Column(db.Integer)
lyrics = db.relationship('Lyrics', backref='track', uselist=False)
@@ -119,7 +120,7 @@ class Track(db.Model):
nullable=True)
def __init__(self, path):
- if type(path) not in (unicode, str, file):
+ if not isinstance(path, (basestring, file)):
raise ValueError('Invalid parameter for Track. Path or File '
'expected, got %s' % type(path))
@@ -128,7 +129,7 @@ def __init__(self, path):
_path = path.name
self.set_path(_path)
- self._id3r = None
+ self._meta = None
def __setattr__(self, attr, value):
if attr == 'title':
@@ -146,19 +147,18 @@ def set_path(self, path):
if path != self.get_path():
self.path = path
if os.path.exists(self.get_path()):
- self.file_size = self.get_id3_reader().size
- self.bitrate = self.get_id3_reader().bitrate
- self.length = self.get_id3_reader().length
- self.number = self.get_id3_reader().track_number
- self.title = self.get_id3_reader().title
-
- def get_id3_reader(self):
- """Returns an object with the ID3 info reader.
- """
- if not getattr(self, '_id3r', None):
- self._id3r = ID3Manager(self.get_path())
-
- return self._id3r
+ meta = self.get_metadata_reader()
+ self.file_size = meta.filesize
+ self.bitrate = meta.bitrate
+ self.length = meta.length
+ self.number = meta.track_number
+ self.title = meta.title
+
+ def get_metadata_reader(self):
+ """Return a MetadataManager object."""
+ if not getattr(self, '_meta', None):
+ self._meta = MetadataManager(self.get_path())
+ return self._meta
def __repr__(self):
return "<Track ('%s')>" % self.title
View
@@ -499,8 +499,8 @@ def __init__(self, artist, json):
self.venue = json['venue']
def split_artists(self, json):
- if len(json) == 0:
- ([], [])
+ if not len(json):
+ return ([], [])
elif len(json) == 1:
artist = Artist.query.filter_by(name=json[0]['name']).first()
Oops, something went wrong.