/
Rakefile
236 lines (199 loc) · 6.03 KB
/
Rakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
#
# Rakefile for self publishing ebook of Kindle made by scanning paper book.
#
# modify parameters by your environment:
# SRC (must): source PDF file name.
# TOP, BOTTOM, LEFT, RIGHT: default margins (pixel) of trimming.
# SIZE: adjust image size by destination format.
# LEVEL (optional): level option of ImageMagic.
#
# for Debian or Ubuntu user, needs packages below:
# poppler-utils poppler-data imagemagick pdftk sam2p
#
SRC = 'sample.pdf'
TOP = 250
BOTTOM = 100
LEFT = 50
RIGHT = 50
# for Kindle Voyage (display size 1080x1440)
SIZE = '1016x1364' # for small books reading portrait style
# for Kindle Paperwhite (display size 768x1024)
#SIZE = '658x905' # for small books reading portrait style
#SIZE = ### UNKNOWN ### # for large books reading landscape style
#SIZE = ### UNKNOWN ### # for generating mobi, portrait style only
#SIZE = ### NOT SUPPORTED BY PW ### # for generating zip archived png files
# for Kindle, Kindle Keyboard (display size 600x800)
#SIZE = '560x735' # for small books reading portrait style
#SIZE = '720' # for large books reading landscape style
#SIZE = 'x693' # for generating mobi, portrait style only
#SIZE = '600x800' # for generating zip archived png files
LEVEL = '0%,100%'
#LEVEL = '0%,80%,0.2' # for grayscale or fullcolor origin.
#---------------------------------------------------------
PPM_DIR = './ppm'; directory PPM_DIR
PNG_DIR = './png'; directory PNG_DIR
PDF_DIR = './pdf'; directory PDF_DIR
DST = SRC.sub( /\.pdf$/, '.out.pdf' )
MOBI = SRC.sub( /\.pdf$/, '.mobi' )
OPF = SRC.sub( /\.pdf$/, '.opf' )
HTML = SRC.sub( /\.pdf$/, '.html' )
ZIP = SRC.sub( /\.pdf$/, '.zip' )
def count_pages
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Pages:\s*(\d+)/ ).flatten[0].to_i
end
def book_title
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Title:\s*(.+)$/ ).flatten[0]
end
def book_author
open( "|pdfinfo #{SRC}", 'r:utf-8', &:read ).scan( /^Author:\s*(.+)$/ ).flatten[0]
end
def image_list( dir, ext, count )
[].tap do |l|
0.upto( count - 1 ) do |i|
l << "#{dir}/tmp-#{'%03d' % i}.#{ext}"
end
end
end
def ppm_exist?( ppm )
File::exist?( ppm ) || File::exist?( ppm.sub(/ppm$/, 'pbm') )
end
def ppm_file( ppm )
if File::exist?( ppm )
ppm
elsif File::exist?( ppm.sub(/ppm$/, 'pbm') )
ppm.sub(/ppm$/, 'pbm')
end
end
def ppm2png( ppm, png )
sh "convert #{ppm_file(ppm)} \
#{ENV["KINDLIZER_PHASE2_OPT"]} \
-level '#{LEVEL}' -type Grayscale -background white \
-chop #{LEFT}x#{TOP} \
-gravity SouthEast -chop #{RIGHT}x#{BOTTOM}\
-gravity NorthWest -fuzz 50% -trim -resize #{SIZE}\
#{/x/ =~ SIZE ? '' : '-gravity SouthWest -splice 1x15 -gravity NorthEast -splice 1x15'}\
#{png}"
end
def png2pdf( png, pdf )
sh "sam2p -j:quiet #{ENV['KINDLIZER_PHASE3_OPT']} #{png} #{pdf}"
end
pages = count_pages
PPMS = image_list( PPM_DIR, 'ppm', pages )
PNGS = image_list( PNG_DIR, 'png', pages )
PDFS = image_list( PDF_DIR, 'pdf', pages )
PNGS.each_with_index do |png, i|
file PDFS[i] => [PDF_DIR, PNGS[i]] do |t|
png2pdf( t.prerequisites[1], t.name )
end
file PNGS[i] => [PNG_DIR, PPMS[i]] do |t|
ppm2png( t.prerequisites[1], t.name )
end
file PPMS[i] => [PPM_DIR, SRC] do
unless ppm_exist?( PPMS[-1] ) then
#sh "pdftoppm -r 300 -gray #{SRC} #{PPM_DIR}/tmp"
sh "pdfimages #{SRC} #{PPM_DIR}/tmp"
end
end
end
task :default => :pdf
desc 'generate pdf file by concat all png files.'
task :pdf => DST
file DST => [PDF_DIR, 'metadata.txt'] + PDFS do
sh "pdftk #{PDFS.join ' '} cat output #{PDF_DIR}/#{DST}"
sh "pdftk #{PDF_DIR}/#{DST} update_info metadata.txt output #{DST}"
end
desc 'generate metadata file from source pdf.'
task :metadata => 'metadata.txt'
file 'metadata.txt' => SRC do |t|
sh "pdftk #{t.prerequisites.join ' '} dump_data output ./#{t.name}"
end
desc 'crop ppm files to png files.'
task :png => [PNG_DIR] + PNGS
rule '.png' => /\.p[bp]m$/ do |t|
ppm2png( t.prerequisites[0], t.name )
end
desc 'extract image files from source pdf.'
task :ppm => [PPM_DIR, SRC] + PPMS
desc 'cleanap ppm images.'
task 'clean-ppm' do
begin
rm PPMS.collect { |f| ppm_file(f) }
rescue
end
end
desc 'cleanap png images.'
task 'clean-png' do
begin
rm PNGS
rescue
end
end
desc 'cleanap temporaly pdf files.'
task 'clean-pdf' do
rm FileList["#{PDF_DIR}/*.pdf"]
end
desc 'cleanap all tmp files.'
task :clean => ['clean-png', 'clean-ppm', 'clean-pdf'] do
rm 'metadata.txt'
begin
rm [HTML, OPF]
rescue
end
rmdir PPM_DIR
rmdir PNG_DIR
rmdir PDF_DIR
end
desc 'generate zip file'
task :zip => PNGS do
sh "zip -j #{ZIP} #{PNGS.join ' '}"
end
desc 'generate MOBI file.'
task :mobi => [OPF, HTML] + PNGS do |t|
sh "kindlegen #{OPF} -o #{MOBI}"
end
rule '.opf' => '.pdf' do |t|
opf = <<-OPF.gsub( /^\t/, '' )
<?xml version="1.0" encoding="utf-8"?>
<package unique-identifier="uid">
<metadata>
<dc-metadata xmlns:dc="http://purl.org/metadata/dublin_core"
xmlns:oebpackage="http://openebook.org/namespaces/oeb-package/1.0/">
<dc:Title>#{book_title}</dc:Title>
<dc:Language>en-US</dc:Language>
<dc:Creator>#{book_author}</dc:Creator>
<dc:Date>#{Time::now.strftime '%m/%d/%Y'}</dc:Date>
</dc-metadata>
<x-metadata>
<output encoding="utf-8" content-type="text/x-oeb1-document"></output>
<EmbeddedCover>#{PNGS[0]}</EmbeddedCover>
</x-metadata>
</metadata>
<manifest>
<item id="contents" media-type="text/html" href="#{HTML}"></item>
</manifest>
<spine>
<itemref idref="contents" />
</spine>
<tours></tours>
<guide>
<reference type="start" title="contents" href="#{HTML}"></reference>
</guide>
</package>
OPF
open( t.name, 'w:utf-8' ){|f| f.write opf}
end
rule '.html' => '.pdf' do |t|
html = <<-HTML.gsub( /^\t/, '' )
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
<html lang="ja-JP">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
<title>#{book_title}</title>
</head>
<body style="text-align: right;">
#{PNGS.map{|j| %Q|<img style="height: 100%;" src="#{j}" />|}.join "<mbp:pagebreak />\n\t\t"}
</body>
</html>
HTML
open( t.name, 'w:utf-8' ){|f| f.write html}
end