-
Notifications
You must be signed in to change notification settings - Fork 0
/
import.rb
84 lines (71 loc) · 2.43 KB
/
import.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# coding: utf-8
# This script converts your Wordpress export file to Jekyll posts formatted with markdown
# How to run this script?
# 1. Put this script into your Jekyll root folder
# 2. Export your posts to a .xml file using the Wordpress tools
# 3. Put your .xml file exported in the Jekyll root folder as well
# 4. Run `ruby import.rb [your-export-file-name.xml]
require 'hpricot'
require 'fileutils'
require 'safe_yaml'
require 'html2markdown'
module JekyllImport
# This importer takes a *.xml file, which can be exported from your
# wordpress.com blog (/wp-admin/export.php).
module WordpressDotCom
def self.process(filename = {:source => ARGV[0]})
import_count = Hash.new(0)
doc = Hpricot::XML(File.read(filename[:source]))
(doc/:channel/:item).each do |item|
title = item.at(:title).inner_text.strip
permalink_title = item.at('wp:post_name').inner_text.gsub("/","-")
# Fallback to "prettified" title if post_name is empty (can happen)
if permalink_title == ""
permalink_title = sluggify(title)
end
if item.at('wp:post_date')
begin
date = Time.parse(item.at('wp:post_date').inner_text)
rescue
date = Time.now
end
else
date = Time.now
end
name = "#{date.strftime('%Y-%m-%d')}-#{permalink_title}.md"
type = item.at('wp:post_type').inner_text
tags = item.search('category[@domain="post_tag"]').map{|t| t.inner_text}.uniq
header = {
'layout' => type,
'title' => title,
'tags' => tags
}
begin
FileUtils.mkdir_p "_#{type}s"
filename = "_#{type}s/#{name}"
File.open(filename, "w") do |f|
f.puts header.to_yaml
f.puts '---'
f.puts item.at('content:encoded').inner_text
end
p = HTMLPage.new(contents: File.read(filename))
File.open(filename, "w") { |f| f.puts p.markdown }
rescue => e
puts "Couldn't import post!"
puts "Title: #{title}"
puts "Name/Slug: #{name}\n"
puts "Error: #{e.message}"
next
end
import_count[type] += 1
end
import_count.each do |key, value|
puts "Imported #{value} #{key}s"
end
end
def self.sluggify(title)
title.gsub(/[^[:alnum:]]+/, '-').downcase
end
end
end
JekyllImport::WordpressDotCom.process