forked from bmaland/happyblogger
/
pre-process.rb
executable file
·70 lines (57 loc) · 2.1 KB
/
pre-process.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
#!/usr/bin/env ruby
# This script processes files generated by org-mode and does some
# transformations to make them acceptable for Jekyll.
require "rubygems"
require "hpricot"
require "yaml"
require "time"
def process_post(post, layout)
# Check if the post already has been processed (look for YAML header)
return nil if post[0..2] == '---'
# Strip off everything outside the main div & extract categories
doc = Hpricot(post)
cats = (doc/'span.tag').remove
cats = (cats/'span').map { |i| i.inner_html }.map { |i| i.downcase }
# Extract h2 title
h2 = (doc/'h2#sec-1').remove
# Extract timestamp
timestamp = (doc/'span.timestamp-wrapper').remove
if timestamp
br = (doc/'div#text-1 p br:first')
br.remove unless br.none? # Remove <br /> right after timestamp-wrapper
# I use europen date format, must do some conversion to make Time.parse work
t = timestamp.search('span.timestamp').inner_html
unless t.nil? or t.empty?
t = t.split # 03/06/09 Wed 15:00
tt = t[0].split('/') # 03/06/09
tt = [tt[2], tt[1], tt[0]].join('/') # Euro -> US date
if t[2] # If the timestamp includes time
date = Time.parse("#{tt} #{t[2]} +2") # +2 is my local time zone offset
else # only date
date = Time.parse("#{tt} +2")
end
end
end
# Extract the top outline
post = doc.search('div#outline-container-1').inner_html
# Extract footnotes, if any, and downgrade h2 to h3
footnotes = doc.search('div#footnotes').inner_html.gsub("h2", "h3")
# Extract metadata and insert yaml
meta = {}
meta['layout'] = layout
meta['title'] = h2.inner_html.gsub(' ', '').strip # insert h2 title
meta['categories'] = cats unless cats.empty?
meta['date'] = date if date
meta = meta.to_yaml + "---\n\n"
# Return the whole thing
return meta + post + footnotes
end
def process(glob, layout)
Dir.glob(glob).each do |f|
file = File.open(f, "r")
post = process_post(file.read, layout)
File.open(f, "w").write(post) if post
end
end
process("#{File.dirname(__FILE__)}/_posts/*.html", "post")
process("#{File.dirname(__FILE__)}/pages/*.html", "page")