Skip to content
Find file
Fetching contributors…
Cannot retrieve contributors at this time
107 lines (93 sloc) 3.6 KB
# -*- coding: utf-8 -*-
import re
import random
import locale
from collections import defaultdict
import itertools
import sqlalchemy
import discogs_client as discogs
from editing import MusicBrainzClient
import Levenshtein
import config as cfg
engine = sqlalchemy.create_engine(cfg.MB_DB)
db = engine.connect()
db.execute('SET search_path TO musicbrainz')
mb = MusicBrainzClient(cfg.MB_USERNAME, cfg.MB_PASSWORD, cfg.MB_SITE)
discogs.user_agent = 'MusicBrainzDiscogsReleaseGroupsBot/0.1 +'
query_rg_without_master = '''
SELECT rg.artist_credit,, rg.gid,
FROM release_group rg
JOIN release_name ON =
FROM release_group rg
JOIN release ON = release.release_group
JOIN l_release_url l_ru ON = l_ru.entity0
JOIN link l ON =
WHERE l.link_type = 76
FROM release_group rg
JOIN l_release_group_url l_rgu ON = l_rgu.entity0
JOIN link l ON =
WHERE l.link_type = 90
query_rg_release_discogs = '''
SELECT url.url
FROM l_release_url l_ru
JOIN link l ON =
JOIN release ON = l_ru.entity0
JOIN release_group rg ON = release.release_group
JOIN release_name ON =
JOIN url ON = l_ru.entity1
WHERE release.release_group = %s AND l.link_type = 76
def discogs_artists_str(artists):
if len(artists) > 1:
return ' and '.join([', '.join([ for a in artists[:-1]]), artists[-1].name])
return artists[0].name
def discogs_get_master(release_urls):
for release_url in release_urls:
m = re.match(r'[0-9]+)', release_url)
if m:
release_id = int(
release = discogs.Release(release_id)
master = release.master
if master:
yield (master.title, master._id, discogs_artists_str(master.artists))
def out(t):
print t.encode(locale.getpreferredencoding())
rg_by_ac = defaultdict(list)
for count, (ac, rg, gid, name) in enumerate(db.execute(query_rg_without_master)):
rg_by_ac[ac].append((rg, gid, name))
rg_grouped = rg_by_ac.values()
for i, (rg, gid, name) in enumerate(itertools.chain(*rg_grouped)):
urls = set(u[0] for u in db.execute(query_rg_release_discogs, rg))
if len(urls) < 2:
out(u'%d/%d - %.2f%%' % (i, count, i * 100.0 / count))
out(u'%s' % (name, gid))
masters = list(discogs_get_master(urls))
if len(masters) == 0:
out(u' aborting, no Discogs master!')
if len(set(masters)) > 1:
out(u' aborting, releases with different Discogs master in one group!')
if len(masters) != len(urls):
out(u' aborting, releases without Discogs master in group!')
master_name, master_id, master_artists = masters[0]
ratio = Levenshtein.ratio(master_name.lower(), name.lower())
if ratio < 0.8:
out(u' Similarity ratio too small: %.2f' % ratio)
master_url = '' % master_id
text = u'There are %d distinct Discogs links in this release group, and all point to this master URL.\n' % len(urls)
text += u'The name of the Discogs master is “%s” (similarity: %.0f%%)' % (master_name, ratio * 100)
text += u' by %s.' % master_artists
out(u' %s\n %s' % (master_url, text))
mb.add_url('release_group', gid, 90, master_url, text)
Something went wrong with that request. Please try again.