Permalink
Browse files

avoid linkextractor to return unicode urls (fixes tests after replace…

…ment of w3lib urljoin_rfc)
  • Loading branch information...
1 parent 8b68910 commit b8fed65a13f67783ffde9b864584038d42cec066 @kalessin kalessin committed May 16, 2012
Showing with 2 additions and 3 deletions.
  1. +2 −3 slybot/linkextractor.py
View
@@ -180,9 +180,8 @@ def iterlinks(htmlpage):
base_href = remove_entities(htmlpage.url, encoding=htmlpage.encoding)
def mklink(url, anchortext=None, nofollow=False):
url = url.strip()
- fullurl = urljoin(base_href,
- remove_entities(url, encoding=htmlpage.encoding), htmlpage.encoding)
- return Link(fullurl, text=anchortext, nofollow=nofollow)
+ fullurl = urljoin(base_href, remove_entities(url, encoding=htmlpage.encoding))
+ return Link(fullurl.encode(htmlpage.encoding), text=anchortext, nofollow=nofollow)
# iter to quickly scan only tags
tag_iter = (t for t in htmlpage.parsed_body if isinstance(t, HtmlTag))

0 comments on commit b8fed65

Please sign in to comment.