Skip to content
Browse files

lyrics: fetching lyrics from lyrics.wikia.com now works again

The trick is to transform HTML entities, like '&lt;' into '<', so that we don't
have to expect one or the other in the code, we always have the 'nicest'
representation.
  • Loading branch information...
1 parent 1f36c5c commit 7fbb3d50d0891a4ff6208999510268846ebd4b26 @multani committed Jan 25, 2011
Showing with 6 additions and 2 deletions.
  1. +6 −2 sonata/lyricwiki.py
View
8 sonata/lyricwiki.py
@@ -1,3 +1,4 @@
+from HTMLParser import HTMLParser
import os
import urllib
import re
@@ -40,11 +41,14 @@ def lyricwiki_editlink(self, songinfo):
def get_lyrics_thread(self, callback, artist, title):
re_textarea = re.compile(r'<textarea[^>]*>')
- NO_LYRICS = '&lt;!-- PUT LYRICS HERE (and delete this entire line) --&gt;'
+ NO_LYRICS = '<!-- PUT LYRICS HERE (and delete this entire line) -->'
def get_content(page):
content = page.read()
content = re_textarea.split(content)[1].split("</textarea>")[0]
+ # Transform HTML entities, like '&lt;' into '<', of the textarea
+ # content.
+ content = HTMLParser().unescape(content)
return content.strip()
try:
@@ -57,7 +61,7 @@ def get_content(page):
% urllib.quote(content.split("[[")[1].split("]]")[0])
content = get_content(urllib.urlopen(addr))
- lyrics = content.split("&lt;lyrics&gt;")[1].split("&lt;/lyrics&gt;")[0].strip()
+ lyrics = content.split("<lyrics>")[1].split("</lyrics>")[0].strip()
if lyrics != NO_LYRICS:
lyrics = misc.unescape_html(lyrics)
lyrics = misc.wiki_to_html(lyrics)

0 comments on commit 7fbb3d5

Please sign in to comment.
Something went wrong with that request. Please try again.