Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

add fetch_images, but not enabled yet

still wondering how to present the images
  • Loading branch information...
commit 631b20bdf809b9b04d4b9e432fd7442ffb7d8850 1 parent 054a99c
Ronghui Yu authored March 11, 2012

Showing 1 changed file with 32 additions and 3 deletions. Show diff stats Hide diff stats

  1. 35  web2py_online_book.py
35  web2py_online_book.py
@@ -6,6 +6,7 @@
6 6
 '''
7 7
 
8 8
 import urllib
  9
+import os
9 10
 from HTMLParser import HTMLParser
10 11
 
11 12
 class WebArticle(object):
@@ -20,13 +21,16 @@ def title(self):
20 21
     def content(self):
21 22
         return self.__content
22 23
 
  24
+    def images(self):
  25
+        return self.__images
  26
+
23 27
     def set_title(self, title):
24 28
         self.__title = title
25 29
 
26 30
     def append(self, content):
27 31
         self.__content += content
28 32
 
29  
-    def add_image(self, url):
  33
+    def append_image(self, url):
30 34
         self.__images.append(url)
31 35
 
32 36
 class WebDocParser(HTMLParser, object):
@@ -130,8 +134,10 @@ def handle_startendtag(self, tag, attrs):
130 134
             elif tag == 'img':
131 135
                 for attr, value in attrs:
132 136
                     if attr == 'src':
133  
-                        self.__article.append('\n**image** %s\n' % value)
134  
-                        self.__article.add_image(value)
  137
+                        if not value.startswith('http'):
  138
+                            value = 'http://web2py.com' + value
  139
+                        self.__article.append('<img %s>' % value)
  140
+                        self.__article.append_image(value)
135 141
                         break
136 142
 
137 143
     def handle_data(self, data):
@@ -160,6 +166,18 @@ def article_from(url):
160 166
     parser.feed(html)
161 167
     return parser.article()
162 168
 
  169
+def fetch_images(article):
  170
+    images = []
  171
+    seq = 0
  172
+    for url in article.images():
  173
+        print 'Fetching image from "' + url + '"...'
  174
+        opener = urllib.urlopen(url)
  175
+        data = opener.read()
  176
+        name = '%s' % seq
  177
+        seq = seq + 1
  178
+        images.append((data, name))
  179
+    return images
  180
+
163 181
 def main():
164 182
     articles = []
165 183
     base = 'http://web2py.com/books/default/chapter/29/%d'
@@ -185,5 +203,16 @@ def main():
185 203
         book.write(footline)
186 204
         book.close()
187 205
 
  206
+        '''
  207
+        for article in articles:
  208
+            images = fetch_images(article)
  209
+            if images:
  210
+                folder = article.title().replace(' ', '')
  211
+                if not os.path.exists(folder):
  212
+                    os.mkdir(folder)
  213
+                for data, name in images:
  214
+                    print name, data
  215
+        '''
  216
+
188 217
 if __name__ == '__main__':
189 218
     main()

0 notes on commit 631b20b

Please sign in to comment.
Something went wrong with that request. Please try again.