Permalink
Browse files

handle special tags outside of tables as well

  • Loading branch information...
1 parent 631b20b commit acc70f81851244b73013e09184ec5a17d3272b3c @stoneyrh committed Mar 11, 2012
Showing with 16 additions and 5 deletions.
  1. +16 −5 web2py_online_book.py
View
@@ -33,6 +33,14 @@ def append(self, content):
def append_image(self, url):
self.__images.append(url)
+ def polish(self):
+ self.__content = self.__content.\
+ replace('-lt-', '<').\
+ replace('-gt-', '>').\
+ replace('-nbsp-', ' ').\
+ replace('-amp-', '&').\
+ replace('-quot-', '"')
+
class WebDocParser(HTMLParser, object):
#
class CodeTable(object):
@@ -49,11 +57,6 @@ def create_column(self):
def __call__(self, data):
row = self.rows[-1]
if data != '\n':
- data = data.replace('-lt-', '<').\
- replace('-gt-', '>').\
- replace('-nbsp-', ' ').\
- replace('-amp-', '&').\
- replace('-quot-', '"')
if len(row[-1]) == 0:
row[-1].append('')
row[-1][-1] += data
@@ -150,6 +153,13 @@ def handle_entityref(self, name):
def handle_charref(self, name):
pass
+ def feed(self, html):
+ super(WebDocParser, self).feed(html)
+ self.__article.polish()
+
+ def close(self):
+ super(WebDocParser, self).close()
+
def article_from(url):
opener = urllib.urlopen(url)
html = opener.read()
@@ -164,6 +174,7 @@ def article_from(url):
replace('&quot;', '-quot-')
parser = WebDocParser(url)
parser.feed(html)
+ parser.close()
return parser.article()
def fetch_images(article):

0 comments on commit acc70f8

Please sign in to comment.