From 1f36c5c5f1a8ad680cd54b8d811b3b428c99a4ed Mon Sep 17 00:00:00 2001 From: Jonathan Ballet Date: Tue, 25 Jan 2011 11:38:18 +0100 Subject: [PATCH] lyrics: refactor the get_lyrics function to avoid repetition and makes it clearer --- sonata/lyricwiki.py | 39 +++++++++++++++++++++------------------ 1 file changed, 21 insertions(+), 18 deletions(-) diff --git a/sonata/lyricwiki.py b/sonata/lyricwiki.py index 9969a3b8..2914efe4 100644 --- a/sonata/lyricwiki.py +++ b/sonata/lyricwiki.py @@ -1,6 +1,7 @@ import os import urllib import re +import sys import threading # get_lyrics_start starts a thread get_lyrics_thread import gobject @@ -37,26 +38,27 @@ def lyricwiki_editlink(self, songinfo): (artist, title)) def get_lyrics_thread(self, callback, artist, title): + + re_textarea = re.compile(r']*>') + NO_LYRICS = '<!-- PUT LYRICS HERE (and delete this entire line) -->' + + def get_content(page): + content = page.read() + content = re_textarea.split(content)[1].split("")[0] + return content.strip() + try: - lyricpage = urllib.urlopen(('http://lyrics.wikia.com/index.php?' - 'title=%s:%s&action=edit') \ - % (self.lyricwiki_format(artist), - self.lyricwiki_format(title))).read() - content = re.split("]*>", - lyricpage)[1].split("")[0] - content = content.strip() - redir_tag = "#redirect" - if content[:len(redir_tag)].lower() == redir_tag: + addr = 'http://lyrics.wikia.com/index.php?title=%s:%s&action=edit' \ + % (self.lyricwiki_format(artist), self.lyricwiki_format(title)) + content = get_content(urllib.urlopen(addr)) + + if content.lower().startswith("#redirect"): addr = "http://lyrics.wikia.com/index.php?title=%s&action=edit" \ % urllib.quote(content.split("[[")[1].split("]]")[0]) - lyricpage = urllib.urlopen(addr).read() - content = re.split("]*>", - lyricpage)[1].split("")[0] - content = content.strip() - lyrics = content.split( - "<lyrics>")[1].split("</lyrics>")[0].strip() - if lyrics != ('<!-- PUT LYRICS HERE ' - '(and delete this entire line) -->'): + content = get_content(urllib.urlopen(addr)) + + lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0].strip() + if lyrics != NO_LYRICS: lyrics = misc.unescape_html(lyrics) lyrics = misc.wiki_to_html(lyrics) lyrics = lyrics.decode("utf-8") @@ -64,7 +66,8 @@ def get_lyrics_thread(self, callback, artist, title): else: error = _("Lyrics not found") self.call_back(callback, error=error) - except: + except Exception, e: + print >> sys.stderr, "Error while fetching the lyrics:\n%s" % e error = _("Fetching lyrics failed") self.call_back(callback, error=error)