Permalink
Browse files

Add a Zenphoto to WordPress migration script.

  • Loading branch information...
1 parent ba66a5f commit d1094b77b04b185b0bff727a8f44c0db6a915bb6 @kdeldycke committed Aug 30, 2012
Showing with 198 additions and 0 deletions.
  1. +198 −0 zenphoto-to-wordpress.py
View
198 zenphoto-to-wordpress.py
@@ -0,0 +1,198 @@
+#!/usr/bin/python
+# -*- coding: UTF-8 -*-
+
+"""
+This scripts allows you to transfer ZenPhoto content to WordPress.
+
+It browse the MySQL database of a ZenPhoto instance and generate an XML file. The XML produced is a WXR file (WordPress eXtended RSS), which mean it can be imported into a WordPress site.
+
+A ZenPhoto album is imported as a post with a [gallery] tag in it.
+All images of an album are imported as attachements and tied to the post it belongs to.
+
+The script currently doesn't take care of sub-albums, as I didn't had any to migrate.
+
+The script requires the following python modules:
+ * lxml
+ * PyMySQL
+
+These can easely be installed on Debian with the following commands:
+ $ aptitude install python-pip python-lxml
+ $ pip install PyMySQL
+"""
+
+import lxml
+import pymysql
+import email.utils
+import time
+from lxml import etree
+
+
+## Configuration
+
+MYSQL_HOST = '127.0.0.1'
+MYSQL_PORT = 3306
+MYSQL_USER = 'root'
+MYSQL_PASSWORD = ''
+
+ZENPHOTO_DB = 'mysite'
+ZENPHOTO_TABLE_PREFIX ='zenphoto_'
+ZENPHOTO_ALBUM_ROOT_URL = 'http://mysite.example.com/zenphoto/albums'
+
+WORDPRESS_ROOT_URL = 'http://mysite.example.com'
+
+XML_FILEPATH = './zenphoto-export.xml'
+
+## End of configuration
+
+
+NS_EXCERPT = "http://wordpress.org/export/1.2/excerpt/"
+NS_CONTENT = "http://purl.org/rss/1.0/modules/content/"
+NS_WFW = "http://wellformedweb.org/CommentAPI/"
+NS_DC = "http://purl.org/dc/elements/1.1/"
+NS_WP = "http://wordpress.org/export/1.2/"
+
+EXCERPT = "{%s}" % NS_EXCERPT
+CONTENT = "{%s}" % NS_CONTENT
+WFW = "{%s}" % NS_WFW
+DC = "{%s}" % NS_DC
+WP = "{%s}" % NS_WP
+
+NSMAP = {
+ 'excerpt': NS_EXCERPT,
+ 'content': NS_CONTENT,
+ 'wfw': NS_WFW,
+ 'dc': NS_DC,
+ 'wp': NS_WP,
+ }
+
+conn = pymysql.connect(host=MYSQL_HOST, port=MYSQL_PORT, user=MYSQL_USER, passwd=MYSQL_PASSWORD, db=ZENPHOTO_DB)
+cr = conn.cursor()
+
+def query(table_name, columns, extra=''):
+ """
+ Utility method to query the database
+ """
+ results = []
+ q = "SELECT %s FROM %s%s %s" % (
+ ', '.join(["`%s`" % c for c in columns]),
+ ZENPHOTO_TABLE_PREFIX,
+ table_name,
+ extra,
+ )
+ cr.execute(q)
+ for row in cr.fetchall():
+ cleaned_row_values = []
+ for r in row:
+ if isinstance(r, str):
+ cleaned_row_values.append(r.decode('UTF-8'))
+ else:
+ cleaned_row_values.append(r)
+ results.append(dict(zip(columns, cleaned_row_values)))
+ return results
+
+# Utility method to clean up multi-line HTML text.
+clean_text = lambda s: s.replace('\r\n', '\n').strip().replace('\n', "<br />")
+rfc822_date = lambda d: email.utils.formatdate(time.mktime(d.timetuple()))
+
+albums = query('albums', ['id', 'folder', 'title', 'desc', 'date'])
+photos = query('images', ['id', 'albumid', 'filename', 'title', 'desc', 'sort_order', 'date'])
+photo_comments = query('comments', ['id', 'ownerid', 'name', 'email', 'website', 'date', 'comment', 'IP'], "WHERE type='images'")
+
+# Shift all IDs to prevent WordPress collisions
+# MYSQL_LAST_INCREMENT = 50000
+# SELECT Auto_increment
+# FROM information_schema.tables
+# WHERE table_schema = DATABASE() AND table_name='coolcavepress_posts';
+#for row_list in [albums, photos, photo_comments]:
+# for row in row_list:
+# for column in ['id', 'albumid']:
+# if column in row:
+# row[column] = row[column] + MYSQL_LAST_INCREMENT
+
+items = []
+
+# Create an attachment for each photo
+for photo in photos:
+ # Prepare content
+ title = photo['title'].strip()
+ if not title:
+ title = photo['filename']
+ description = ''
+ if photo.get('desc', None):
+ description = clean_text(photo['desc'])
+ order = ''
+ if photo['sort_order']:
+ order = str(photo['sort_order'])
+ album_folder = [a for a in albums if a['id'] == photo['albumid']][0]['folder']
+ attachment_url = []
+ attachment_url.append(ZENPHOTO_ALBUM_ROOT_URL)
+ attachment_url.append(album_folder)
+ attachment_url.append(photo['filename'])
+ attachment_url = '/'.join(attachment_url)
+ # Build the XML item
+ attachment = etree.Element("item")
+ etree.SubElement(attachment, "title").text = title
+ etree.SubElement(attachment, "pubDate").text = rfc822_date(photo['date'])
+ etree.SubElement(attachment, DC + "creator").text = 'admin'
+ etree.SubElement(attachment, CONTENT + "encoded").text = etree.CDATA(description)
+ etree.SubElement(attachment, WP + "post_id").text = str(photo['id'])
+ etree.SubElement(attachment, WP + "post_date").text = photo['date'].isoformat(' ')
+ etree.SubElement(attachment, WP + "post_date_gmt").text = photo['date'].isoformat(' ')
+ etree.SubElement(attachment, WP + "status").text = "inherit"
+ etree.SubElement(attachment, WP + "post_parent").text = str(photo['albumid'])
+ etree.SubElement(attachment, WP + "menu_order").text = order
+ etree.SubElement(attachment, WP + "post_type").text = "attachment"
+ etree.SubElement(attachment, WP + "attachment_url").text = attachment_url
+ # Add photo's comments
+ for comment_data in [c for c in photo_comments if c['ownerid'] == photo['id']]:
+ comment = etree.Element(WP + "comment")
+ etree.SubElement(comment, WP + "comment_id").text = str(comment_data['id'])
+ etree.SubElement(comment, WP + "comment_author").text = etree.CDATA(comment_data['name'])
+ etree.SubElement(comment, WP + "comment_author_email").text = comment_data['email']
+ etree.SubElement(comment, WP + "comment_author_url").text = comment_data['website']
+ etree.SubElement(comment, WP + "comment_author_IP").text = comment_data['IP']
+ etree.SubElement(comment, WP + "comment_date").text = comment_data['date'].isoformat(' ')
+ etree.SubElement(comment, WP + "comment_date_gmt").text = comment_data['date'].isoformat(' ')
+ etree.SubElement(comment, WP + "comment_content").text = etree.CDATA(clean_text(comment_data['comment']))
+ attachment.append(comment)
+ items.append(attachment)
+
+# Create one post for each album
+for album in albums:
+ # Prepare content
+ title = album['title'].strip()
+ body = "[gallery]"
+ if album.get('desc', None):
+ body = "%s\n\n%s" % (album['desc'], body)
+ body = clean_text(body)
+ url = "%s/?p=%s" % (WORDPRESS_ROOT_URL, album['id'])
+ # Build the XML item
+ post = etree.Element("item")
+ etree.SubElement(post, "title").text = title
+ etree.SubElement(post, "link").text = url
+ etree.SubElement(post, "pubDate").text = rfc822_date(album['date'])
+ etree.SubElement(post, DC + "creator").text = 'admin'
+ etree.SubElement(post, "guid", attrib={"isPermaLink": "false"}).text = url
+ etree.SubElement(post, CONTENT + "encoded").text = etree.CDATA(body)
+ etree.SubElement(post, WP + "post_id").text = str(album['id'])
+ etree.SubElement(post, WP + "post_date").text = album['date'].isoformat(' ')
+ etree.SubElement(post, WP + "post_date_gmt").text = album['date'].isoformat(' ')
+ etree.SubElement(post, WP + "status").text = "publish"
+ etree.SubElement(post, WP + "post_type").text = "post"
+ etree.SubElement(post, "category", attrib={"domain": "category", "nicename": "photos"}).text = etree.CDATA("Photos")
+ items.append(post)
+
+# Generate the final XML document
+channel = etree.Element("channel")
+etree.SubElement(channel, WP + "wxr_version").text = "1.2"
+for item in items:
+ channel.append(item)
+root = etree.Element("rss", attrib={"version": "2.0"}, nsmap=NSMAP)
+root.append(channel)
+
+f = open(XML_FILEPATH, 'w')
+f.write(etree.tostring(root, xml_declaration=True, pretty_print=True, encoding='UTF-8'))
+f.close()
+
+cr.close()
+conn.close()

0 comments on commit d1094b7

Please sign in to comment.