Minor clean up of formatting and comments

some-programs · Apr 9, 2012 · 5f951c0 · 5f951c0
1 parent 58362ee
commit 5f951c0
Show file tree

Hide file tree

Showing 2 changed files with 14 additions and 23 deletions.
diff --git a/config.yaml b/config.yaml
@@ -5,7 +5,6 @@ wp_exports: wordpress-xml
 build_dir: build
 
 # Output format: primary choices are html or markdown.
-#target_format: markdown
 target_format: markdown
 
 # The date format of the wikipedia export file.

diff --git a/exitwp.py b/exitwp.py
@@ -17,8 +17,7 @@
 '''
 exitwp - Wordpress xml exports to Jekykll blog format conversion
 
-Tested with Wordpress 3.3.1 and jekyll master branch from 2011-03-26
-pandoc is required to be installed if conversion from html will be done.
+Tested with Wordpress 3.3.1 and jekyll 0.11.2
 
 '''
 ######################################################
@@ -41,33 +40,30 @@ def __init__(self):
         XMLTreeBuilder.__init__(self)
         self._parser.StartNamespaceDeclHandler=self._start_ns
         self.namespaces={}
- 
+
     def _start_ns(self, prefix, ns):
         self.namespaces[prefix]='{' + ns + '}'
 
-
 def html2fmt(html, target_format):
-#    html = html.replace("\n\n", '<br/><br/>')
- #   html = html.replace('<pre lang="xml">', '<pre lang="xml"><![CDATA[')
- #   html = html.replace('</pre>', ']]></pre>')
+    #   html = html.replace("\n\n", '<br/><br/>')
+    #   html = html.replace('<pre lang="xml">', '<pre lang="xml"><![CDATA[')
+    #   html = html.replace('</pre>', ']]></pre>')
     if target_format=='html':
         return html
     else:
-        # This is like very stupid but I was having troubles with unicode encodings and process.POpen
+        # This is probably a stupid solution.
+        # but I was having troubles with character encodings
+        # and process.POpen.
         return html2text_file(html, None)
 
 def parse_wp_xml(file):
-
     parser=ns_tracker_tree_builder()
     tree=ElementTree()
-
     print "reading: " + wpe
-
     root=tree.parse(file, parser)
-
     ns=parser.namespaces
     ns['']=''
-    
+
     c=root.find('channel')
 
     def parse_header():
@@ -87,7 +83,9 @@ def parse_items():
                 if not "domain" in tax.attrib: continue
                 t_domain=unicode(tax.attrib['domain'])
                 t_entry=unicode(tax.text)
-                if not (t_domain in taxonomy_filter) and not (t_domain in taxonomy_entry_filter and taxonomy_entry_filter[t_domain]==t_entry):
+                if (not (t_domain in taxonomy_filter) and
+                       not (t_domain in taxonomy_entry_filter and
+                       taxonomy_entry_filter[t_domain]==t_entry)):
                     if not t_domain in export_taxanomies:
                             export_taxanomies[t_domain]=[]
                     export_taxanomies[t_domain].append(t_entry)
@@ -136,7 +134,6 @@ def gi(q, unicode_wrap=True):
         'items': parse_items(),
     }
 
-
 def write_jekyll(data, target_format):
 
     sys.stdout.write("writing")
@@ -233,7 +230,6 @@ def get_attachment_path(src, dir, dir_prefix='a'):
     #data['items']=[]
 
     for i in data['items']:
-
         skip_item = False
 
         for field, value in item_field_filter.iteritems():
@@ -279,15 +275,12 @@ def get_attachment_path(src, dir, dir_prefix='a'):
         else:
             print "Unknown item type :: " +  i['type']
 
-
         if download_images:
             for img in i['img_srcs']:
                 try:
-                    urlretrieve(urljoin(data['header']['link'],img.decode('utf-8')), get_attachment_path(img, i['uid']))
+                    urlretrieve(urljoin(data['header']['link'], img.decode('utf-8')), get_attachment_path(img, i['uid']))
                 except:
-                    print "\n unable to download "+urljoin(data['header']['link'],img.decode('utf-8'))
-
-
+                    print "\n unable to download " + urljoin(data['header']['link'], img.decode('utf-8'))
 
         if out is not None:
             def toyaml(data):
@@ -314,7 +307,6 @@ def toyaml(data):
             out.close()
     print "\n"
 
-
 wp_exports=glob(wp_exports+'/*.xml')
 for wpe in wp_exports:
     data=parse_wp_xml(wpe)