diff --git a/config.yaml b/config.yaml
index e362bc6..56f53e9 100644
--- a/config.yaml
+++ b/config.yaml
@@ -5,7 +5,6 @@ wp_exports: wordpress-xml
build_dir: build
# Output format: primary choices are html or markdown.
-#target_format: markdown
target_format: markdown
# The date format of the wikipedia export file.
diff --git a/exitwp.py b/exitwp.py
index 4184c8f..7f65738 100755
--- a/exitwp.py
+++ b/exitwp.py
@@ -17,8 +17,7 @@
'''
exitwp - Wordpress xml exports to Jekykll blog format conversion
-Tested with Wordpress 3.3.1 and jekyll master branch from 2011-03-26
-pandoc is required to be installed if conversion from html will be done.
+Tested with Wordpress 3.3.1 and jekyll 0.11.2
'''
######################################################
@@ -41,33 +40,30 @@ def __init__(self):
XMLTreeBuilder.__init__(self)
self._parser.StartNamespaceDeclHandler=self._start_ns
self.namespaces={}
-
+
def _start_ns(self, prefix, ns):
self.namespaces[prefix]='{' + ns + '}'
-
def html2fmt(html, target_format):
-# html = html.replace("\n\n", '
')
- # html = html.replace('
', '', ']]>
')
+ # html = html.replace("\n\n", '
')
+ # html = html.replace('', '', ']]>
')
if target_format=='html':
return html
else:
- # This is like very stupid but I was having troubles with unicode encodings and process.POpen
+ # This is probably a stupid solution.
+ # but I was having troubles with character encodings
+ # and process.POpen.
return html2text_file(html, None)
def parse_wp_xml(file):
-
parser=ns_tracker_tree_builder()
tree=ElementTree()
-
print "reading: " + wpe
-
root=tree.parse(file, parser)
-
ns=parser.namespaces
ns['']=''
-
+
c=root.find('channel')
def parse_header():
@@ -87,7 +83,9 @@ def parse_items():
if not "domain" in tax.attrib: continue
t_domain=unicode(tax.attrib['domain'])
t_entry=unicode(tax.text)
- if not (t_domain in taxonomy_filter) and not (t_domain in taxonomy_entry_filter and taxonomy_entry_filter[t_domain]==t_entry):
+ if (not (t_domain in taxonomy_filter) and
+ not (t_domain in taxonomy_entry_filter and
+ taxonomy_entry_filter[t_domain]==t_entry)):
if not t_domain in export_taxanomies:
export_taxanomies[t_domain]=[]
export_taxanomies[t_domain].append(t_entry)
@@ -136,7 +134,6 @@ def gi(q, unicode_wrap=True):
'items': parse_items(),
}
-
def write_jekyll(data, target_format):
sys.stdout.write("writing")
@@ -233,7 +230,6 @@ def get_attachment_path(src, dir, dir_prefix='a'):
#data['items']=[]
for i in data['items']:
-
skip_item = False
for field, value in item_field_filter.iteritems():
@@ -279,15 +275,12 @@ def get_attachment_path(src, dir, dir_prefix='a'):
else:
print "Unknown item type :: " + i['type']
-
if download_images:
for img in i['img_srcs']:
try:
- urlretrieve(urljoin(data['header']['link'],img.decode('utf-8')), get_attachment_path(img, i['uid']))
+ urlretrieve(urljoin(data['header']['link'], img.decode('utf-8')), get_attachment_path(img, i['uid']))
except:
- print "\n unable to download "+urljoin(data['header']['link'],img.decode('utf-8'))
-
-
+ print "\n unable to download " + urljoin(data['header']['link'], img.decode('utf-8'))
if out is not None:
def toyaml(data):
@@ -314,7 +307,6 @@ def toyaml(data):
out.close()
print "\n"
-
wp_exports=glob(wp_exports+'/*.xml')
for wpe in wp_exports:
data=parse_wp_xml(wpe)