Skip to content

Commit

Permalink
Merge pull request #76 from blitzr/master
Browse files Browse the repository at this point in the history
PY3 compatibility
  • Loading branch information
philipmat committed Feb 20, 2017
2 parents c924153 + fe3b924 commit 73f5306
Show file tree
Hide file tree
Showing 13 changed files with 103 additions and 101 deletions.
12 changes: 6 additions & 6 deletions couchdbexporter.py
Expand Up @@ -5,7 +5,7 @@


class CouchDbExporter(object):

def __init__(self, server_url, data_quality=[]):
self.min_data_quality = data_quality
self.server = server_url
Expand All @@ -15,7 +15,7 @@ def connect(self, server_url):
u = urlparse.urlparse(server_url)
db_name = u.path.split('/')[1]
server = "%s://%s" % (u.scheme, u.netloc)
print 'Connecting to %s and database %s.' % (server, db_name)
print('Connecting to %s and database %s.' % (server, db_name))
couch = couchdb.Server(server)
self.db = couch[db_name]

Expand All @@ -27,24 +27,24 @@ def good_quality(self, what):
def execute(self, what):
if not self.good_quality(what):
return
# have to convert it to json and back because
# have to convert it to json and back because
# on simple objects couchdb-python throws:
# TypeError: argument of type 'instance' is not iterable
# and on dicts:
# AttributeError: 'dict' object has no attribute 'read'
doc = json.loads(json.dumps(what, default=jsonizer))
self.db.save(doc)


def finish(self, completely_done = False):
pass

def storeLabel(self, label):
self.execute(label)

def storeArtist(self, artist):
self.execute(artist)

def storeRelease(self, release):
self.execute(release)

Expand Down
8 changes: 4 additions & 4 deletions discogsartistparser.py
Expand Up @@ -56,7 +56,7 @@ def __init__(self, exporter, stop_after=0, ignore_missing_tags=False):
def startElement(self, name, attrs):
if not name in self.inElement:
if not self.ignore_missing_tags:
print "Error: Unknown Artist element '%s'." % name
print("Error: Unknown Artist element '%s'." % name)
sys.exit()
elif not name in self.unknown_tags:
self.unknown_tags.append(name)
Expand All @@ -73,8 +73,8 @@ def startElement(self, name, attrs):
image.width = attrs["width"]
self.artist.images.append(image)
if len(attrs) != 5:
print "ATTR ERROR"
print attrs
print("ATTR ERROR")
print(attrs)
sys.exit()

def characters(self, data):
Expand Down Expand Up @@ -121,7 +121,7 @@ def endElement(self, name):
if self.stop_after > 0 and artistCounter >= self.stop_after:
self.endDocument()
if self.ignore_missing_tags and len(self.unknown_tags) > 0:
print 'Encountered some unknown Artist tags: %s' % (self.unknown_tags)
print('Encountered some unknown Artist tags: %s' % (self.unknown_tags))
raise model.ParserStopError(artistCounter)
else:
sys.stderr.writelines("Ignoring Artist %s with no name. Dictionary: %s\n" % (self.artist.id, self.artist.__dict__))
Expand Down
8 changes: 4 additions & 4 deletions discogslabelparser.py
Expand Up @@ -51,7 +51,7 @@ def __init__(self, exporter, stop_after=0, ignore_missing_tags = False):
def startElement(self, name, attrs):
if not name in self.inElement:
if not self.ignore_missing_tags:
print "Error: Unknown Label element '%s'." % name
print("Error: Unknown Label element '%s'." % name)
sys.exit()
elif not name in self.unknown_tags:
self.unknown_tags.append(name)
Expand All @@ -68,8 +68,8 @@ def startElement(self, name, attrs):
newImage.width = attrs["width"]
self.label.images.append(newImage)
if len(attrs) != 5:
print "ATTR ERROR"
print attrs
print("ATTR ERROR")
print(attrs)
sys.exit()

def characters(self, data):
Expand Down Expand Up @@ -112,7 +112,7 @@ def endElement(self, name):
if self.stop_after > 0 and labelCounter >= self.stop_after:
self.endDocument()
if self.ignore_missing_tags and len(self.unknown_tags) > 0:
print 'Encountered some unknown Label tags: %s' % (self.unknown_tags)
print('Encountered some unknown Label tags: %s' % (self.unknown_tags))
raise model.ParserStopError(labelCounter)

self.inElement[name] = False
Expand Down
26 changes: 13 additions & 13 deletions discogsmasterparser.py
Expand Up @@ -67,7 +67,7 @@ def __init__(self, exporter, stop_after=0, ignore_missing_tags=False):
def startElement(self, name, attrs):
if not name in self.knownTags:
if not self.ignore_missing_tags:
print "Error: Unknown Master element '%s'." % name
print("Error: Unknown Master element '%s'." % name)
sys.exit()
elif not name in self.unknown_tags:
self.unknown_tags.append(name)
Expand All @@ -84,8 +84,8 @@ def startElement(self, name, attrs):
img.width = attrs["width"]
self.master.images.append(img)
if len(attrs) != 5:
print "ATTR ERROR"
print attrs
print("ATTR ERROR")
print(attrs)
sys.exit()

def characters(self, data):
Expand Down Expand Up @@ -140,9 +140,9 @@ def endElement(self, name):
# joins[self.buffer] = True
elif name == 'role':
if len(self.buffer) != 0:
#print "ROLE PRE" + str(self.buffer)
#print("ROLE PRE" + str(self.buffer))
roles_list = re.findall('([^[,]+(?:\[[^]]+])?)+', self.buffer) # thanks to jlatour
#print "ROLE POST" + str(self.buffer)
#print("ROLE POST" + str(self.buffer))
for role in roles_list:
role = role.strip()
lIndex = role.find('[')
Expand Down Expand Up @@ -176,19 +176,19 @@ def endElement(self, name):
if self.stop_after > 0 and masterCounter >= self.stop_after:
self.endDocument()
if self.ignore_missing_tags and len(self.unknown_tags) > 0:
print 'Encountered some unknown Master tags: %s' % (self.unknown_tags)
print('Encountered some unknown Master tags: %s' % (self.unknown_tags))
raise model.ParserStopError(masterCounter)

if self.stack[-1] == name:
self.stack.pop()
self.buffer = ''

def endDocument(self):
#print [genre for genre in genres]
#print [style for style in styles]
#print [format for format in formats]
#print [dsc for dsc in descriptions]
#print [j for j in joins]
#print [(role, roles[role]) for role in roles]
#print len(roles)
#print([genre for genre in genres])
#print([style for style in styles])
#print([format for format in formats])
#print([dsc for dsc in descriptions])
#print([j for j in joins])
#print([(role, roles[role]) for role in roles])
#print(len(roles))
self.exporter.finish()
50 changes: 26 additions & 24 deletions discogsparser.py
Expand Up @@ -28,16 +28,16 @@
#sys.setdefaultencoding('utf-8')
options = None

exporters = { 'json': 'jsonexporter.JsonConsoleExporter',
'pgsql' : 'postgresexporter.PostgresExporter',
exporters = { 'json': 'jsonexporter.JsonConsoleExporter',
'pgsql' : 'postgresexporter.PostgresExporter',
'pgdump': 'postgresexporter.PostgresConsoleDumper',
'couch' : 'couchdbexporter.CouchDbExporter',
'mongo' : 'mongodbexporter.MongoDbExporter',
}

# http://www.discogs.com/help/voting-guidelines.html
data_quality_values = ( 'Needs Vote',
'Complete And Correct',
'Complete And Correct',
'Correct',
'Needs Minor Changes',
'Needs Major Changes',
Expand All @@ -48,8 +48,10 @@

def first_file_match(file_pattern):
global options
matches = filter(lambda f: file_pattern in f, options.file)
return matches[0] if len(matches) > 0 else None
for f in options.file:
if file_pattern in f:
return f
return None


def parseArtists(parser, exporter):
Expand All @@ -62,10 +64,10 @@ def parseArtists(parser, exporter):
artist_file = in_file

if artist_file is None:
#print "No artist file specified."
#print("No artist file specified.")
return
elif not path.exists(artist_file):
#print "File %s doesn't exist:" % artist_file
#print("File %s doesn't exist:" % artist_file)
return

from discogsartistparser import ArtistHandler
Expand All @@ -78,12 +80,12 @@ def parseArtists(parser, exporter):
else:
parser.parse(artist_file)
except ParserStopError as pse:
print "Parsed %d artists then stopped as requested." % pse.records_parsed
print("Parsed %d artists then stopped as requested." % pse.records_parsed)
# except model.ParserStopError as pse22:
# print "Parsed %d artists then stopped as requested." % pse.records_parsed
# print("Parsed %d artists then stopped as requested." % pse.records_parsed)
# except Exception as ex:
# print "Raised unknown error"
# print type(ex)
# print("Raised unknown error")
# print(type(ex))


def parseLabels(parser, exporter):
Expand All @@ -96,10 +98,10 @@ def parseLabels(parser, exporter):
label_file = in_file

if label_file is None:
#print "No label file specified."
#print("No label file specified.")
return
elif not path.exists(label_file):
#print "File %s doesn't exist:" % label_file
#print("File %s doesn't exist:" % label_file)
return

from discogslabelparser import LabelHandler
Expand All @@ -112,7 +114,7 @@ def parseLabels(parser, exporter):
else:
parser.parse(label_file)
except ParserStopError as pse:
print "Parsed %d labels then stopped as requested." % pse.records_parsed
print("Parsed %d labels then stopped as requested." % pse.records_parsed)


def parseReleases(parser, exporter):
Expand All @@ -125,10 +127,10 @@ def parseReleases(parser, exporter):
release_file = in_file

if release_file is None:
#print "No release file specified."
#print("No release file specified.")
return
elif not path.exists(release_file):
#print "File %s doesn't exist:" % release_file
#print("File %s doesn't exist:" % release_file)
return

from discogsreleaseparser import ReleaseHandler
Expand All @@ -141,7 +143,7 @@ def parseReleases(parser, exporter):
else:
parser.parse(release_file)
except ParserStopError as pse:
print "Parsed %d releases then stopped as requested." % pse.records_parsed
print("Parsed %d releases then stopped as requested." % pse.records_parsed)


def parseMasters(parser, exporter):
Expand All @@ -154,10 +156,10 @@ def parseMasters(parser, exporter):
master_file = in_file

if master_file is None:
#print "No masters file specified."
#print("No masters file specified.")
return
elif not path.exists(master_file):
#print "File %s doesn't exist:" % master_file
#print("File %s doesn't exist:" % master_file)
return

from discogsmasterparser import MasterHandler
Expand All @@ -170,7 +172,7 @@ def parseMasters(parser, exporter):
else:
parser.parse(master_file)
except ParserStopError as pse:
print "Parsed %d masters then stopped as requested." % pse.records_parsed
print("Parsed %d masters then stopped as requested." % pse.records_parsed)



Expand All @@ -179,7 +181,7 @@ def select_exporter(options):
if options.output is None:
return exporters['json']

if exporters.has_key(options.output):
if options.output in exporters:
return exporters[options.output]
# should I be throwing an exception here?
return exporters['json']
Expand All @@ -189,8 +191,8 @@ def make_exporter(options):

parts = exp_module.split('.')
m = __import__('.'.join(parts[:-1]))
for i in xrange(1, len(parts)):
m = getattr(m, parts[i])
for p in parts[1:]:
m = getattr(m, p)

data_quality = list(x.strip().lower() for x in (options.data_quality or '').split(',') if x)
return m(options.params, data_quality=data_quality)
Expand All @@ -212,7 +214,7 @@ def main(argv):
--params "http://localhost:5353/"
'''
)
opt_parser.add_argument('-n', type=int, help='Number of records to parse')
opt_parser.add_argument('-n', type=int, help='Number of records to parse', default=0)
opt_parser.add_argument('-d', '--date', help='Date of release. For example 20110301')
opt_parser.add_argument('-o', '--output', choices=exporters.keys(), default='json', help='What to output to')
opt_parser.add_argument('-p', '--params', help='Parameters for output, e.g. connection string')
Expand Down
24 changes: 12 additions & 12 deletions discogsreleaseparser.py
Expand Up @@ -85,7 +85,7 @@ def __init__(self, exporter, stop_after=0, ignore_missing_tags=False):
def startElement(self, name, attrs):
if not name in self.knownTags:
if not self.ignore_missing_tags:
print "Error: Unknown Release element '%s'." % name
print("Error: Unknown Release element '%s'." % name)
sys.exit()
elif not name in self.unknown_tags:
self.unknown_tags.append(name)
Expand All @@ -108,8 +108,8 @@ def startElement(self, name, attrs):
img.width = attrs["width"]
self.release.images.append(img)
if len(attrs) != 5:
print "ATTR ERROR"
print attrs
print("ATTR ERROR")
print(attrs)
sys.exit()

elif name == 'format':
Expand Down Expand Up @@ -212,7 +212,7 @@ def endElement(self, name):
# Track extra artist id
elif name == 'id' and 'artist' in self.stack and 'track' in self.stack and 'sub_track' not in self.stack and 'extraartists' in self.stack:
if len(self.buffer) != 0:
teaj = model.Extraartist()
teaj = model.Extraartist()
teaj.artist_id = self.buffer
self.release.tracklist[-1].extraartists.append(teaj)

Expand Down Expand Up @@ -341,19 +341,19 @@ def endElement(self, name):
if self.stop_after > 0 and releaseCounter >= self.stop_after:
self.endDocument()
if self.ignore_missing_tags and len(self.unknown_tags) > 0:
print 'Encountered some unknown Release tags: %s' % (self.unknown_tags)
print('Encountered some unknown Release tags: %s' % (self.unknown_tags))
raise model.ParserStopError(releaseCounter)

if self.stack[-1] == name:
self.stack.pop()
self.buffer = ''

def endDocument(self):
#print [genre for genre in genres]
#print [style for style in styles]
#print [format for format in formats]
#print [dsc for dsc in descriptions]
#print [j for j in joins]
#print [(role, roles[role]) for role in roles]
#print len(roles)
#print([genre for genre in genres])
#print([style for style in styles])
#print([format for format in formats])
#print([dsc for dsc in descriptions])
#print([j for j in joins])
#print([(role, roles[role]) for role in roles])
#print(len(roles))
self.exporter.finish()

0 comments on commit 73f5306

Please sign in to comment.