Merge pull request #35 from cedricporter/master

Fix decode bug, and change body_replace to re replace
some-programs · Jan 2, 2013 · 7dd5c92 · 7dd5c92
2 parents f62d758 + d85f14e
commit 7dd5c92
Show file tree

Hide file tree

Showing 3 changed files with 13 additions and 4 deletions.
diff --git a/config.yaml b/config.yaml
@@ -33,6 +33,10 @@ taxonomies:
 # Replace certain patterns in body
 # Simply replace the key with its value
 body_replace: {
+  # '<pre.*?lang="(.*?)".*?>': '\n{% codeblock \1 lang:\1 %}\n',
+  # '<pre.*?>': '\n{% codeblock %}\n',
+  # '</pre>': '\n{% endcodeblock %}\n',
+
 #    '[python]': '{% codeblock lang:python %}',
 #    '[/python]': '{% endcodeblock %}',
 }
diff --git a/exitwp.py b/exitwp.py
@@ -107,7 +107,8 @@ def gi(q, unicode_wrap=True):
 
             body = gi('content:encoded')
             for key in body_replace:
-                body = body.replace(key, body_replace[key])
+                # body = body.replace(key, body_replace[key])
+                body = re.sub(key, body_replace[key], body)
 
             img_srcs = []
             if body is not None:
@@ -292,10 +293,10 @@ def get_attachment_path(src, dir, dir_prefix='a'):
             for img in i['img_srcs']:
                 try:
                     urlretrieve(urljoin(data['header']['link'],
-                                        img.decode('utf-8')),
+                                        img.encode('utf-8')),
                                 get_attachment_path(img, i['uid']))
                 except:
-                    print "\n unable to download " + urljoin(data['header']['link'], img.decode('utf-8'))
+                    print "\n unable to download " + urljoin(data['header']['link'], img.encode('utf-8'))
 
         if out is not None:
             def toyaml(data):

diff --git a/html2text.py b/html2text.py
@@ -339,7 +339,11 @@ def previousIndex(self, attrs):
     def drop_last(self, nLetters):
         if not self.quiet:
             self.outtext = self.outtext[:-nLetters]
-
+
+    def handle_comment(self, data):
+        if data == "more":
+            self.o("<!-- more -->")
+
     def handle_emphasis(self, start, tag_style, parent_style):
         """handles various text emphases"""
         tag_emphasis = google_text_emphasis(tag_style)