Permalink
Browse files

add fetch_images, but not enabled yet

still wondering how to present the images
  • Loading branch information...
stoneyrh committed Mar 11, 2012
1 parent 054a99c commit 631b20bdf809b9b04d4b9e432fd7442ffb7d8850
Showing with 32 additions and 3 deletions.
  1. +32 −3 web2py_online_book.py
View
@@ -6,6 +6,7 @@
'''
import urllib
+import os
from HTMLParser import HTMLParser
class WebArticle(object):
@@ -20,13 +21,16 @@ def title(self):
def content(self):
return self.__content
+ def images(self):
+ return self.__images
+
def set_title(self, title):
self.__title = title
def append(self, content):
self.__content += content
- def add_image(self, url):
+ def append_image(self, url):
self.__images.append(url)
class WebDocParser(HTMLParser, object):
@@ -130,8 +134,10 @@ def handle_startendtag(self, tag, attrs):
elif tag == 'img':
for attr, value in attrs:
if attr == 'src':
- self.__article.append('\n**image** %s\n' % value)
- self.__article.add_image(value)
+ if not value.startswith('http'):
+ value = 'http://web2py.com' + value
+ self.__article.append('<img %s>' % value)
+ self.__article.append_image(value)
break
def handle_data(self, data):
@@ -160,6 +166,18 @@ def article_from(url):
parser.feed(html)
return parser.article()
+def fetch_images(article):
+ images = []
+ seq = 0
+ for url in article.images():
+ print 'Fetching image from "' + url + '"...'
+ opener = urllib.urlopen(url)
+ data = opener.read()
+ name = '%s' % seq
+ seq = seq + 1
+ images.append((data, name))
+ return images
+
def main():
articles = []
base = 'http://web2py.com/books/default/chapter/29/%d'
@@ -185,5 +203,16 @@ def main():
book.write(footline)
book.close()
+ '''
+ for article in articles:
+ images = fetch_images(article)
+ if images:
+ folder = article.title().replace(' ', '')
+ if not os.path.exists(folder):
+ os.mkdir(folder)
+ for data, name in images:
+ print name, data
+ '''
+
if __name__ == '__main__':
main()

0 comments on commit 631b20b

Please sign in to comment.