Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add plugin tangoinfo #56

Merged
merged 1 commit into from
Jan 12, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
263 changes: 263 additions & 0 deletions plugins/tangoinfo/tangoinfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,263 @@
# -*- coding: utf-8 -*-
PLUGIN_NAME = "Tango.info Adapter"
PLUGIN_AUTHOR = "Felix Elsner"
PLUGIN_DESCRIPTION = "\
<p>Load genre, date and vocalist tags from the online database tango.info.</p>\
<p>This plugin uses web scraping, but only once per album. In so doing \
it does not cause unnecessary server load for either MusicBrainz.org\
or tango.info</p>\
"
PLUGIN_VERSION = "0.1.5"
PLUGIN_API_VERSIONS = ["1.3.0", "1.4.0"]
PLUGIN_LICENSE = "GPL-2.0"
PLUGIN_LICENSE_URL = "https://www.gnu.org/licenses/gpl-2.0.html"

from picard import log
from picard.util import LockableObject
from picard.metadata import register_track_metadata_processor

from functools import partial
import re

trs = re.compile("<tr>((?!<\/tr>).+?)</tr>")
tds = re.compile("<td[^>]*>((?!<\td>).+?)</td>")
regexp = re.compile(
"""<h2><a href="\/tracks">Tracks<\/a><\/h2>(?!<\/table>)(.+?)<\/table>"""
)


class TangoInfoTagger:

class TangoInfoScrapeQueue(LockableObject):

def __init__(self):
LockableObject.__init__(self)
self.queue = {}

def __contains__(self, name):
return name in self.queue

def __iter__(self):
return self.queue.__iter__()

def __getitem__(self, name):
self.lock_for_read()
value = self.queue[name] if name in self.queue else None
self.unlock()
return value

def __setitem__(self, name, value):
self.lock_for_write()
self.queue[name] = value
self.unlock()

def append(self, name, value):
self.lock_for_write()
if name in self.queue:
self.queue[name].append(value)
value = False
else:
self.queue[name] = [value]
value = True
self.unlock()
return value

def remove(self, name):
self.lock_for_write()
value = None
if name in self.queue:
value = self.queue[name]
del self.queue[name]
self.unlock()
return value

def __init__(self):
self.albumpage_cache = {}
self.albumpage_queue = self.TangoInfoScrapeQueue()

def add_tangoinfo_data(self, album, track_metadata,
trackXmlNode, releaseXmlNode):
#log.debug("Track metadata: %s " % track_metadata)

# BARCODE or barcode
if track_metadata["barcode"]:
barcode = str(track_metadata["barcode"])
elif track_metadata["BARCODE"]:
barcode = str(track_metadata["BARCODE"])
else:
# Abort if no barcode in track_metadata
#log.debug("%s: No barcode found for %s" % (PLUGIN_NAME, \
# trackXmlNode) # No __getitem__ function? too verbose...
# )
return

tint = str("0%s-%s-%s" % (
barcode,
str(track_metadata["discnumber"])\
if track_metadata.get("discnumber") else "1",
str(track_metadata["tracknumber"])
))
#log.debug("%s: TINT: %s" % (PLUGIN_NAME, tint))

if barcode in self.albumpage_cache:
if self.albumpage_cache[barcode]:
if not self.albumpage_cache[barcode].get(tint):
log.debug("%s: No information on tango.info for barcode \
%s" % (PLUGIN_NAME, barcode))
else:
for field in ['genre', 'date', 'vocal']:
# Checks, as not to overwrite with empty data
if self.albumpage_cache[barcode][tint].get(field):
track_metadata[field] = self\
.albumpage_cache[barcode][tint][field]
else:
log.debug("%s: No information on tango.info for barcode %s" \
% (PLUGIN_NAME, barcode))
else:
#log.debug("%s: Adding to queue: %s, new track: %s" \
# % (PLUGIN_NAME, barcode, album._new_tracks[-1]))
self.website_add_track(album, album._new_tracks[-1], barcode, tint)


def website_add_track(self, album, track, barcode, tint, with_zero=False):
self.album_add_request(album)

if self.albumpage_queue.append(barcode, (track, album, tint)):
# log.debug("%s: Downloading from tango.info: track %s, album %s, \
# with TINT %s" % (\
# PLUGIN_NAME, str(track), str(album), tint)
# )

host = 'tango.info'
port = 443
path = '/%s' % (barcode)

# Call website_process as a partial func
return album.tagger.xmlws.get(host, port, path,
partial(self.website_process, barcode, with_zero),
xml=False, priority=False, important=False)

def website_process(self, barcode, with_zero, response, reply, error):

if error:
log.error("%s: Error retrieving info for barcode %s" % \
PLUGIN_NAME, barcode)
tuples = self.albumpage_queue.remove(barcode)
for track, album in tuples:
self.album_remove_request(album)
return

tangoinfo_album_data = self.barcode_process_metadata(barcode, response)
#log.debug("Album data: %s" % tangoinfo_album_data)

self.albumpage_cache[barcode] = tangoinfo_album_data
tuples = self.albumpage_queue.remove(barcode)
#log.debug("%s: Album requests: %s" % (PLUGIN_NAME, album._requests))

if tangoinfo_album_data:
if with_zero:
log.debug("%s: "
"tango.info does not seem to have data for barcode %s. However, "
"retrying with barcode %s (i.e. the same with a 0 prepended) was "
"successful. This most likely means either MusicBrainz or "
"tango.info has stored a wrong barcode for this release. You might "
"want to investigate this discrepancy and report it." \
% (PLUGIN_NAME, barcode[1:], barcode)
)

for track, album, tint in tuples:
tm = track.metadata

for field in ['genre', 'date', 'vocal']:
# Write track metadata
if self.albumpage_cache[barcode][tint].get(field):
tm[field] = self.albumpage_cache[barcode][tint][field]
for file in track.iterfiles(True):
# from track.py: def iterfiles(self, save=False)...
fm = file.metadata
for field in ['genre', 'date', 'vocal']:
# Write file metadata
if self.albumpage_cache[barcode][tint][field]:
fm[field] = self.albumpage_cache[barcode][tint][field]
self.album_remove_request(album)
else:
if with_zero:
log.debug("%s: "
"Could not load album with barcode %s even with a zero "
"prepended(%s). This most likely means tango.info does "
"not have a release for this barcode (or MusicBrainz has a "
"wrong barcode)" \
% (PLUGIN_NAME, barcode[1:], barcode)
)
for track, album, tint in tuples:
self.album_remove_request(album)
return

log.debug("%s: Retrying with 0-padded barcode for barcode %s" % \
(PLUGIN_NAME, barcode))

for track, album, tint in tuples:
retry_barcode = "0" + str(barcode)
retry_tint = "0" + tint
# Try again with new barcode, but only once(with_zero)
self.website_add_track(
album, track, retry_barcode, retry_tint, with_zero=True
)
self.album_remove_request(album)


def album_add_request(self, album):
album._requests += 1

def album_remove_request(self, album):
album._requests -= 1
album._finalize_loading(None)

def barcode_process_metadata(self, barcode, response):

# Check whether we have a concealed 404 and get the homepage
if "<title>Contents - tango.info</title>" in reponse:
log.debug("%s: No album with barcode %s on tango.info" % \
(PLUGIN_NAME, barcode))
return

table = re.findall(regexp, response)[0]
albuminfo = {}
trcontent = [match.groups()[0] for match in trs.finditer(table)]

for tr in trcontent:
trackinfo = [trmatch.groups()[0] for trmatch in tds.finditer(tr)]
if not trackinfo: # check if list is empty, e.g. contains a <th>
continue

# Get genre
if trackinfo[3] and not trackinfo[3] == "-":
genre = unicode(
re.split('<|>', trackinfo[3])[2].title(), 'utf8'
)
else:
genre = False
# Get date
if trackinfo[6] and not trackinfo[6] == "-":
date = unicode(re.split('<|>', trackinfo[6])[2], 'utf8')
else:
date = False
# Get singers
if trackinfo[5] == "-" or not trackinfo[5]:
vocal = False
elif trackinfo[5]:
vocal = unicode(re.split('<|>', trackinfo[5])[2], 'utf8')
else:
vocal = False

# expected format in HTML: <a href="/002390...">...
tint = trackinfo[8].split("\"")[1][1:]
albuminfo[tint] = {
'genre': genre,
'date': date,
'vocal': vocal
}

return albuminfo

register_track_metadata_processor(TangoInfoTagger().add_tangoinfo_data)