Navigation Menu

Skip to content
This repository has been archived by the owner on Jun 3, 2020. It is now read-only.

Commit

Permalink
Minor clean up of formatting and comments
Browse files Browse the repository at this point in the history
  • Loading branch information
thomasf committed Apr 9, 2012
1 parent 58362ee commit 5f951c0
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 23 deletions.
1 change: 0 additions & 1 deletion config.yaml
Expand Up @@ -5,7 +5,6 @@ wp_exports: wordpress-xml
build_dir: build

# Output format: primary choices are html or markdown.
#target_format: markdown
target_format: markdown

# The date format of the wikipedia export file.
Expand Down
36 changes: 14 additions & 22 deletions exitwp.py
Expand Up @@ -17,8 +17,7 @@
'''
exitwp - Wordpress xml exports to Jekykll blog format conversion
Tested with Wordpress 3.3.1 and jekyll master branch from 2011-03-26
pandoc is required to be installed if conversion from html will be done.
Tested with Wordpress 3.3.1 and jekyll 0.11.2
'''
######################################################
Expand All @@ -41,33 +40,30 @@ def __init__(self):
XMLTreeBuilder.__init__(self)
self._parser.StartNamespaceDeclHandler=self._start_ns
self.namespaces={}

def _start_ns(self, prefix, ns):
self.namespaces[prefix]='{' + ns + '}'


def html2fmt(html, target_format):
# html = html.replace("\n\n", '<br/><br/>')
# html = html.replace('<pre lang="xml">', '<pre lang="xml"><![CDATA[')
# html = html.replace('</pre>', ']]></pre>')
# html = html.replace("\n\n", '<br/><br/>')
# html = html.replace('<pre lang="xml">', '<pre lang="xml"><![CDATA[')
# html = html.replace('</pre>', ']]></pre>')
if target_format=='html':
return html
else:
# This is like very stupid but I was having troubles with unicode encodings and process.POpen
# This is probably a stupid solution.
# but I was having troubles with character encodings
# and process.POpen.
return html2text_file(html, None)

def parse_wp_xml(file):

parser=ns_tracker_tree_builder()
tree=ElementTree()

print "reading: " + wpe

root=tree.parse(file, parser)

ns=parser.namespaces
ns['']=''

c=root.find('channel')

def parse_header():
Expand All @@ -87,7 +83,9 @@ def parse_items():
if not "domain" in tax.attrib: continue
t_domain=unicode(tax.attrib['domain'])
t_entry=unicode(tax.text)
if not (t_domain in taxonomy_filter) and not (t_domain in taxonomy_entry_filter and taxonomy_entry_filter[t_domain]==t_entry):
if (not (t_domain in taxonomy_filter) and
not (t_domain in taxonomy_entry_filter and
taxonomy_entry_filter[t_domain]==t_entry)):
if not t_domain in export_taxanomies:
export_taxanomies[t_domain]=[]
export_taxanomies[t_domain].append(t_entry)
Expand Down Expand Up @@ -136,7 +134,6 @@ def gi(q, unicode_wrap=True):
'items': parse_items(),
}


def write_jekyll(data, target_format):

sys.stdout.write("writing")
Expand Down Expand Up @@ -233,7 +230,6 @@ def get_attachment_path(src, dir, dir_prefix='a'):
#data['items']=[]

for i in data['items']:

skip_item = False

for field, value in item_field_filter.iteritems():
Expand Down Expand Up @@ -279,15 +275,12 @@ def get_attachment_path(src, dir, dir_prefix='a'):
else:
print "Unknown item type :: " + i['type']


if download_images:
for img in i['img_srcs']:
try:
urlretrieve(urljoin(data['header']['link'],img.decode('utf-8')), get_attachment_path(img, i['uid']))
urlretrieve(urljoin(data['header']['link'], img.decode('utf-8')), get_attachment_path(img, i['uid']))
except:
print "\n unable to download "+urljoin(data['header']['link'],img.decode('utf-8'))


print "\n unable to download " + urljoin(data['header']['link'], img.decode('utf-8'))

if out is not None:
def toyaml(data):
Expand All @@ -314,7 +307,6 @@ def toyaml(data):
out.close()
print "\n"


wp_exports=glob(wp_exports+'/*.xml')
for wpe in wp_exports:
data=parse_wp_xml(wpe)
Expand Down

0 comments on commit 5f951c0

Please sign in to comment.