Skip to content

Commit

Permalink
album-level distance function in lastid plugin
Browse files Browse the repository at this point in the history
This involves yet another new plugin method: album_distance. This leaves as the
last major puzzle piece for lastid the ability to augment the initial search
into MB (i.e., can start a search using fingerprinted metadata).
  • Loading branch information
sampsyo committed Sep 14, 2010
1 parent 99cd36e commit 4d978f3
Show file tree
Hide file tree
Showing 5 changed files with 87 additions and 25 deletions.
1 change: 1 addition & 0 deletions .hgignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
^dist/
^beets\.egg-info/
^build/
^MANIFEST$
50 changes: 26 additions & 24 deletions beets/autotag/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -165,37 +165,34 @@ def levenshtein(s1, s2):

return levenshtein(str1, str2) / float(max(len(str1), len(str2)))

def _plurality(objs):
"""Given a sequence of comparable objects, returns the object that
is most common in the set.
"""
# Calculate frequencies.
freqs = defaultdict(int)
for obj in objs:
freqs[obj] += 1

# Find object with maximum frequency.
max_freq = 0
res = None
for obj, freq in freqs.items():
if freq > max_freq:
max_freq = freq
res = obj

return res

def current_metadata(items):
"""Returns the most likely artist and album for a set of Items.
Each is determined by tag reflected by the plurality of the Items.
"""
# The tags we'll try to determine.
keys = 'artist', 'album'

# Make dictionaries in which to count the freqencies of different
# artist and album tags. We'll use this to find the most likely
# artist and album. Defaultdicts let the frequency default to zero.
freqs = {}
for key in keys:
freqs[key] = defaultdict(int)

# Count the frequencies.
for item in items:
for key in keys:
value = getattr(item, key)
if value: # Don't count empty tags.
freqs[key][value] += 1

# Find max-frequency tags.
likelies = {}
for key in keys:
max_freq = 0
likelies[key] = None
for tag, freq in freqs[key].items():
if freq > max_freq:
max_freq = freq
likelies[key] = tag

values = [getattr(item, key) for item in items]
likelies[key] = _plurality(values)
return likelies['artist'], likelies['album']

def order_items(items, trackinfo):
Expand Down Expand Up @@ -292,6 +289,11 @@ def distance(items, info):
dist += track_distance(item, track_data, i+1) * TRACK_WEIGHT
dist_max += TRACK_WEIGHT

# Plugin distances.
plugin_d, plugin_dm = plugins.album_distance(items, info)
dist += plugin_d
dist_max += plugin_dm

# Normalize distance, avoiding divide-by-zero.
if dist_max == 0.0:
return 0.0
Expand Down
17 changes: 17 additions & 0 deletions beets/plugins.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,12 @@ def track_distance(self, item, info):
"""
return 0.0, 0.0

def album_distance(self, items, info):
"""Should return a (distance, distance_max) pair to be added
to the distance value for every album-level comparison.
"""
return 0.0, 0.0

listeners = None
@classmethod
def listen(cls, event):
Expand Down Expand Up @@ -127,6 +133,17 @@ def track_distance(item, info):
dist_max += dm
return dist, dist_max

def album_distance(items, info):
"""Returns the album distance calculated by plugins."""
dist = 0.0
dist_max = 0.0
for plugin in find_plugins():
d, dm = plugin.album_distance(items, info)
dist += d
dist_max += dm
return dist, dist_max


# Event dispatch.

# All the handlers for the event system.
Expand Down
29 changes: 28 additions & 1 deletion beetsplug/lastid.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def track_distance(self, item, info):

# Track title distance.
dist += autotag._ie_dist(last_data['title'],
info['title']) \
info['title']) \
* autotag.TRACK_TITLE_WEIGHT
dist_max += autotag.TRACK_TITLE_WEIGHT

Expand All @@ -80,3 +80,30 @@ def track_distance(self, item, info):
(str(last_data), dist/dist_max))

return dist * DISTANCE_SCALE, dist_max * DISTANCE_SCALE

def album_distance(self, items, info):
# Get "fingerprinted" artists for each track.
artists = []
artist_ids = []
for item in items:
last_data = match(item.path)
if last_data:
artists.append(last_data['artist'])
if last_data['artist_mbid']:
artist_ids.append(last_data['artist_mbid'])

# Vote on the most popular artist.
last_artist = autotag._plurality(artists)
last_artist_id = autotag._plurality(artist_ids)

# Compare artist to MusicBrainz metadata.
dist, dist_max = 0.0, 0.0
dist += autotag._ie_dist(last_artist, info['artist']) \
* autotag.ARTIST_WEIGHT
dist_max += autotag.ARTIST_WEIGHT

log.debug('Last artist (%s/%s) distance: %f' %
(last_artist, info['artist'], dist/dist_max))

#fixme: artist MBID currently ignored (as in vanilla tagger)
return dist, dist_max
15 changes: 15 additions & 0 deletions test/test_autotag.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,21 @@
from beets.library import Item

class AutotagTest(unittest.TestCase):
def test_plurality_consensus(self):
objs = [1, 1, 1, 1]
obj = autotag._plurality(objs)
self.assertEqual(obj, 1)

def test_plurality_near_consensus(self):
objs = [1, 1, 2, 1]
obj = autotag._plurality(objs)
self.assertEqual(obj, 1)

def test_plurality_conflict(self):
objs = [1, 1, 2, 2, 3]
obj = autotag._plurality(objs)
self.assert_(obj in (1, 2))

def test_current_metadata_finds_pluralities(self):
items = [Item({'artist': 'The Beetles', 'album': 'The White Album'}),
Item({'artist': 'The Beatles', 'album': 'The White Album'}),
Expand Down

0 comments on commit 4d978f3

Please sign in to comment.