Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Merge pull request #23 from amithgeorge/fix_issue_no_5

Better fix for issue no 5
  • Loading branch information...
commit 58362ee9d725e155b66c776b5b26265bb4f179f6 2 parents bd608ad + c69e8e9
@thomasf authored
Showing with 18 additions and 12 deletions.
  1. +18 −12 exitwp.py
View
30 exitwp.py
@@ -1,6 +1,6 @@
#!/usr/bin/env python
-from xml.etree.ElementTree import ElementTree
+from xml.etree.ElementTree import ElementTree, XMLTreeBuilder
from subprocess import call, PIPE, Popen
import os, codecs
from datetime import datetime
@@ -17,7 +17,7 @@
'''
exitwp - Wordpress xml exports to Jekykll blog format conversion
-Tested with Wordpress 3.1 and jekyll master branch from 2011-03-26
+Tested with Wordpress 3.3.1 and jekyll master branch from 2011-03-26
pandoc is required to be installed if conversion from html will be done.
'''
@@ -36,6 +36,16 @@
item_field_filter = config['item_field_filter']
date_fmt=config['date_format']
+class ns_tracker_tree_builder(XMLTreeBuilder):
+ def __init__(self):
+ XMLTreeBuilder.__init__(self)
+ self._parser.StartNamespaceDeclHandler=self._start_ns
+ self.namespaces={}
+
+ def _start_ns(self, prefix, ns):
+ self.namespaces[prefix]='{' + ns + '}'
+
+
def html2fmt(html, target_format):
# html = html.replace("\n\n", '<br/><br/>')
# html = html.replace('<pre lang="xml">', '<pre lang="xml"><![CDATA[')
@@ -47,21 +57,17 @@ def html2fmt(html, target_format):
return html2text_file(html, None)
def parse_wp_xml(file):
- ns = {
- '':'', #this is the default namespace
- 'excerpt':"{http://wordpress.org/export/1.1/excerpt/}",
- 'content':"{http://purl.org/rss/1.0/modules/content/}",
- 'wfw':"{http://wellformedweb.org/CommentAPI/}",
- 'dc':"{http://purl.org/dc/elements/1.1/}",
- 'wp':"{http://wordpress.org/export/1.2/}",
- 'atom':"{http://www.w3.org/2005/Atom}"
- }
+ parser=ns_tracker_tree_builder()
tree=ElementTree()
print "reading: " + wpe
- root=tree.parse(file)
+ root=tree.parse(file, parser)
+
+ ns=parser.namespaces
+ ns['']=''
+
c=root.find('channel')
def parse_header():
Please sign in to comment.
Something went wrong with that request. Please try again.