Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

refactoring

1. add images to article
2. add vim tags and footline
3. refactoring for more efficiency
  • Loading branch information...
commit df9e7f6202b8c073a1295fe1cf6633b2b40dcb01 1 parent 8cd3045
Ronghui Yu authored
Showing with 40 additions and 18 deletions.
  1. +40 −18 web2py_online_book.py
58 web2py_online_book.py
View
@@ -3,14 +3,24 @@
class WebArticle(object):
def __init__(self):
- self.title = ""
- self.content = ""
+ self.title_ = ""
+ self.content_ = ""
+ self.images_ = []
+
+ def title(self):
+ return self.title_
+
+ def content(self):
+ return self.content_
def set_title(self, title):
- self.title = title
+ self.title_ = title
def append(self, content):
- self.content += content
+ self.content_ += content
+
+ def add_image(self, url):
+ self.images_.append(url)
class WebDocParser(HTMLParser, object):
def __init__(self, url):
@@ -21,20 +31,21 @@ def __init__(self, url):
self.levels_ = 0
def article(self):
- return self.article_ if self.article_.title and self.article_.content else None
+ return self.article_ if self.article_.title() and self.article_.content() else None
def handle_starttag(self, tag, attrs):
- # If the title is still emtpy, then try to get it
- if not self.article_.title and tag == 'a':
- href, url = attrs[0]
- if self.url_.endswith(url):
- self.consumer_ = self.article_.set_title
- if self.levels_ == 0 and tag == 'div':
- for attr, value in attrs:
- if attr == 'class' and value == 'article':
- self.consumer_ = self.article_.append
- self.levels_ = 1
- if self.levels_ > 0:
+ if self.levels_ == 0:
+ if tag == 'div':
+ for attr, value in attrs:
+ if attr == 'class' and value == 'article':
+ self.consumer_ = self.article_.append
+ self.levels_ = 1
+ # If the title is still emtpy, then try to get it
+ elif tag == 'a' and not self.article_.title():
+ href, url = attrs[0]
+ if self.url_.endswith(url):
+ self.consumer_ = self.article_.set_title
+ elif self.levels_ > 0:
self.levels_ = self.levels_ + 1
if tag == 'p':
self.article_.append("\n")
@@ -42,7 +53,7 @@ def handle_starttag(self, tag, attrs):
def handle_endtag(self, tag):
if self.levels_ > 0:
self.levels_ = self.levels_ - 1
- if self.levels_ == 0:
+ elif self.levels_ == 0:
self.consumer_ = None
def handle_startendtag(self, tag, attrs):
@@ -53,6 +64,7 @@ def handle_startendtag(self, tag, attrs):
for attr, value in attrs:
if attr == 'src':
self.article_.append("\n**image** %s\n" % value)
+ self.article_.add_image(value)
break
def handle_data(self, data):
@@ -81,10 +93,20 @@ def main():
article = article_from(url)
if article:
articles.append(article)
+ print ' ' * 10, article.title()
if articles:
+ headline = 'web2py online book'
+ footline = 'vim:tw=78:fo=tcq2:isk=!-~,^*,^\|,^\":ts=8:ft=help:norl:'
+ index = '\n'.join(['|' + article.title().replace(' ', '-') + '|' for article in articles])
book = open('web2py_online_book.txt', 'w')
+ book.write(headline)
+ book.write('\n')
+ book.write('\n')
+ book.write(index)
for article in articles:
- book.write("\n".join([article.title, article.content]))
+ book.write('\n'.join(['*' + article.title().replace(' ', '-') + '*', article.content()]))
+ book.write('\n')
+ book.write(footline)
book.close()
if __name__ == '__main__':
Please sign in to comment.
Something went wrong with that request. Please try again.