Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Fetching contributors…

Cannot retrieve contributors at this time

466 lines (404 sloc) 17.041 kb
#!/usr/bin/env ruby
# -*- coding: utf-8 -*-
require 'optparse'
require 'fileutils'
require 'erb'
require 'yaml'
require 'pathname'
include FileUtils
def main()
options = {
"build"=> "pdf",
"lang" => "zh",
"config" => "latex/config.yml",
"template" => "latex/template.tex",
"chapter-files" => "*/*.markdown",
"appendix-files"=> "*appendix/*.markdown",
"jeykll" => false
}
config_file = File.join('.mkbok.yml')
if File.exists? config_file
config_options = YAML.load_file(config_file)
options.merge!(config_options)
end
option_parser = OptionParser.new do |opts|
executable_name = File.basename($PROGRAM_NAME)
opts.banner = "make ebooks from markdown plain text
Usage: #{executable_name} [options]
"
# Create a switch
opts.on("-b","--build FORMAT","build format:epub,pdf,html. seperated by ','") do |format|
formatOptions = format.split(",")
formatOptions.each do | fmt |
# puts fmt
unless ["pdf","epub","html"].include?(fmt)
raise ArgumentError,"FORMAT must be one of 'pdf,epub,html' format"
end
end
options["build"] = format
end
opts.on("-d","--debug","debug") do
options["debug"] = true
end
# Create a flag
opts.on("-l","--lang LANG","language selection") do |lang|
unless lang=="zh" or lang=="en"
raise ArgumentError,"LANG must be in zh or en"
end
options["lang"] = lang
end
opts.on("-c","--config CONFIG","config file") do |config|
unless File.exists? config
raise ArgumentError,"config file \"#{config}\" doesn't exist"
end
options["config"] = config
end
opts.on("-t","--template template.tex","latex template file") do |template|
unless File.exists? template
raise ArgumentError,"template file \"#{template}\" doesn't exist"
end
options["template"] = template
end
opts.on("-n","--name book name","book name") do |name|
unless name =~ /^[a-zA-Z0-9]+$/
raise ArgumentError,"name should be [a-zA-Z0-9]"
end
options["name"] = name
end
opts.on("-g","--generate project","project name") do |name|
unless name =~ /^[a-zA-Z0-9]+$/
raise ArgumentError,"name should be [a-zA-Z0-9]"
end
options["command"] = "generate"
options["name"] = name
end
end
option_parser.parse!
#$here = File.expand_path(File.dirname(__FILE__))
$here = Dir.getwd
$root = File.join($here)
$outDir = File.join($root, 'pdf')
options["name"] = File.basename(Dir.getwd) unless options["name"]
options["outputformat"] = options["build"].split(',')
puts options.inspect if options["debug"]
if options["command"] == "generate"
generate_project(options["name"])
exit
end
if options["outputformat"].include?("pdf")
#puts "pdf"
generate_pdf(options)
end
options["outputformat"].delete("pdf")
if options["outputformat"].count>0
generate_ebook(options)
end
end
def figures(&block)
begin
Dir["#$root/figures/18333*.png"].each do |file|
cp(file, file.sub(/18333fig0(\d)0?(\d+)\-tn/, '\1.\2'))
end
block.call
ensure
Dir["#$root/figures/18333*.png"].each do |file|
rm(file.gsub(/18333fig0(\d)0?(\d+)\-tn/, '\1.\2'))
end
end
end
def command_exists?(command)
if File.executable?(command) then
return command
end
ENV['PATH'].split(File::PATH_SEPARATOR).map do |path|
cmd = "#{path}/#{command}"
File.executable?(cmd) || File.executable?("#{cmd}.exe") || File.executable?("#{cmd}.cmd")
end.inject{|a, b| a || b}
end
def replace(string, &block)
string.instance_eval do
alias :s :gsub!
instance_eval(&block)
end
string
end
def verbatim_sanitize(string)
string.gsub('\\', '{\textbackslash}').
gsub('~', '{\textasciitilde}').
gsub(/([\$\#\_\^\%])/, '\\\\' + '\1{}')
end
def pre_pandoc(string, config)
replace(string) do
s /\#\#\#\#\# (.*?) \#\#\#\#\#/, 'PARASECTION: \1'
# Pandoc discards #### subsubsections #### - this hack recovers them
s /\#\#\#\# (.*?) \#\#\#\#/, 'SUBSUBSECTION: \1'
# convert div figures to normal markdown format
# http://johnmacfarlane.net/pandoc/README.html
s /^<div class=\"figures\"> <img src=\"..\/figures\/(.*)\".*<\/div>/, '![](figures/\1)\ '
# ![alt](../figures/LASO-tiny210-433.jpg) => ![alt](figures/LASO-tiny210-433.jpg)
s /^!\[(.*)\]\([\.\/]*\/(figures\/.*)\)/, '![\1](\2)'
# "
# Turns URLs into clickable links
s /\`(http:\/\/[A-Za-z0-9\/\%\&\=\-\_\\\.]+)\`/, '<\1>'
s /(\n\n)\t(http:\/\/[A-Za-z0-9\/\%\&\=\-\_\\\.]+)\n([^\t]|\t\n)/, '\1<\2>\1'
# `
# Process figures
s /Insert\s18333fig\d+\.png\s*\n.*?\d{1,2}-\d{1,2}\. (.*)/, 'FIG: \1'
end
end
def post_pandoc(string, config, lang, chapter=true)
replace(string) do
space = /\s/
# Reformat for the book documentclass as opposed to article
s '\section', '\chap'
s '\sub', '\\'
s /SUBSUBSECTION: (.*)/, '\subsubsection{\1}'
s /PARASECTION: (.*)/, '\paragraph{\1}'
# Enable proper cross-reference
s /#{config['fig'].gsub(space, '\s')}\s*(\d+)\-\-(\d+)/, '\imgref{\1.\2}'
s /#{config['tab'].gsub(space, '\s')}\s*(\d+)\-\-(\d+)/, '\tabref{\1.\2}'
s /#{config['prechap'].gsub(space, '\s')}\s*(\d+)(\s*)#{config['postchap'].gsub(space, '\s')}/, '\chapref{\1}\2'
# Miscellaneous fixes
s /FIG: (.*)/, '\img{\1}'
s '\begin{enumerate}[1.]', '\begin{enumerate}'
s /(\w)--(\w)/, '\1-\2'
s /``(.*?)''/, "#{config['dql']}\\1#{config['dqr']}"
# Typeset the maths in the book with TeX
s '\verb!p = (n(n-1)/2) * (1/2^160))!', '$p = \frac{n(n-1)}{2} \times \frac{1}{2^{160}}$)'
s '2\^{}80', '$2^{80}$'
s /\sx\s10\\\^\{\}(\d+)/, '\e{\1}'
# Convert inline-verbatims into \texttt (which is able to wrap)
s /\\verb(\W)(.*?)\1/ do
"{\\texttt{#{verbatim_sanitize($2)}}}"
end
# Ensure monospaced stuff is in a smaller font
s /(\\verb(\W).*?\2)/, '{\footnotesize\1}'
#s /(\\begin\{verbatim\}.*?\\end\{verbatim\})/m, '{\footnotesize\1}'
# Shaded verbatim block
#s /(\\begin\{verbatim\}.*?\\end\{verbatim\})/m, '\begin{shaded}\1\end{shaded}'
#s /\t/m, ' '
# is using fancyvrb
s /\\begin\{verbatim\}/,'\\begin{Verbatim}[tabsize=4,formatcom=\color{colorchapter},frame=lines]'
s /\\end\{verbatim\}/,'\\end{Verbatim}'
# try moreverb
#s /\\begin\{verbatim\}/,'\\begin{verbatimtab}[4]'
#s /\\end\{verbatim\}/,'\\end{verbatimtab}'
if lang=="zh"
# http://www.devdaily.com/blog/post/latex/control-line-spacing-in-itemize-enumerate-tags
# http://wiki.ctex.org/index.php/LaTeX/%E5%88%97%E8%A1%A8
# set the space of itemsize
s /(\\begin\{itemize\})/m,'\begin{itemize}\setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}\setlength{\parsep}{0pt}'
s /(\\begin\{enumerate\})/m,'\begin{enumerate}\setlength{\itemsep}{1pt}\setlength{\parskip}{0pt}\setlength{\parsep}{0pt}'
# hardcode for itemize to use * instead of dot, which is missed in some chinese fonts
# and keep \item inside \enumerate env is not changed
# \item -> \item[*]
# solution is provided by Alexis, and it works under ruby 1.9+ only due to bug in 1.8.7
# http://stackoverflow.com/questions/9115018/regular-expression-using-ruby-string-gsub-method-to-replace-multi-matches
if RUBY_VERSION >= "1.9"
s /^\\item(?=((?!\\begin\{itemize\}).)*\\end\{itemize\})/m, '\\item[*]'
else
s /(^\\item)/m,'\item[*]'
end
# change the width to standard .6 width
s /\\includegraphics/m, '\\includegraphics[width=\\imgwidth]'
end
if chapter != true
s /^\\chap\{(.*)\}/,'\chapter*{\1}'"\n"'\addcontentsline{toc}{chapter}{\1}'
s /^\\section\{(.*)\}/,'\section*{\1}'
s /^\\subsection\{(.*)\}/,'\subsection*{\1}'
end
end
end
def check_jekyll(str)
str.lines.to_a[4..-3].join
end
def generate_pdf(options)
$config = YAML.load_file(options["config"])
template = ERB.new(File.read(options["template"]))
languages = [options["lang"]]
missing = ['pandoc', 'xelatex'].reject{|command| command_exists?(command)}
unless missing.empty?
puts "Missing dependencies: #{missing.join(', ')}."
puts "Install these and try again."
exit
end
puts "Will generate pdf for the following languages using template #{options["template"]}:"
puts " #{languages}"
puts
figures do
languages.each do |lang|
config = $config['default'].merge($config[lang]) rescue $config['default']
puts "#{lang}:"
prefacefiles = "#$root/#{lang}/#{options['preface-files']}"
puts "\tParsing preface markdown... #{prefacefiles} "
prefacemarkdown = Dir["#{prefacefiles}"].sort.map do |file|
puts "\t =>"+file
if options["jeykll"]
check_jekyll(File.read(file))
else
File.read(file)
end
end.join("\n\n")
preface = IO.popen('pandoc -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
pipe.write(pre_pandoc(prefacemarkdown, config))
pipe.close_write
post_pandoc(pipe.read, config, lang, false)
end
chapterfiles = "#$root/#{lang}/#{options['chapter-files']}"
puts "\n\tParsing main chapters markdown... #{chapterfiles} "
chaptersmarkdown = Dir["#{chapterfiles}"].sort{|a,b| [Integer(a[/\d+/]),a]<=>[Integer(b[/\d+/]),b]}.map do |file|
puts "\t =>"+file
if options["jeykll"]
check_jekyll(File.read(file))
else
File.read(file)
end
end.join("\n\n")
# puts "done"
latex = IO.popen('pandoc -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
pipe.write(pre_pandoc(chaptersmarkdown, config))
pipe.close_write
post_pandoc(pipe.read, config, lang)
end
# puts "done"
appendixfiles = "#$root/#{lang}/#{options['appendix-files']}"
puts "\n\tParsing appendix markdown... #{appendixfiles} "
appendixmarkdown = Dir["#{appendixfiles}"].sort.map do |file|
puts "\t =>"+file
if options["jeykll"]
check_jekyll(File.read(file))
else
File.read(file)
end
end.join("\n\n")
appendix = IO.popen('pandoc -p --no-wrap -f markdown -t latex', 'w+') do |pipe|
pipe.write(pre_pandoc(appendixmarkdown, config))
pipe.close_write
post_pandoc(pipe.read, config, lang)
end
#puts "done"
print "\n\tCreating main.tex for #{lang}... "
dir = "latex/#{lang}"
mkdir_p(dir)
File.open("#{dir}/main.tex", 'w') do |file|
file.write(template.result(binding))
end
puts "done"
abort = false
puts "\tRunning XeTeX:"
cd($root)
3.times do |i|
print "\t\tPass #{i + 1}... "
line = ""
IO.popen("xelatex -output-directory=\"#{dir}\" \"#{dir}/main.tex\" 2>&1") do |pipe|
unless $DEBUG
while line = pipe.gets and not abort
# print line.encoding.name
# use different way to handle encoding issues for ruby
if (RUBY_VERSION >= "1.9" and line.valid_encoding? and line =~ /^!\s/) or (RUBY_VERSION < "1.9" and line =~ /^!\s/)
puts "failed with:\n\t\t\t#{line.strip}"
puts "\tConsider running this again with --debug."
abort = true
end
end
#print line while line = pipe.gets
else
STDERR.print while pipe.gets rescue abort = true
end
end
break if abort
puts "done"
end
unless abort
print "\tMoving output to #{options["name"]}.#{lang}.pdf... "
mv("#{dir}/main.pdf", "#$root/#{options["name"]}.#{lang}.pdf")
puts "done"
else
print "\tConvert error, exit !\n"
exit 1
end
end
end
end
def generate_ebook(options)
name = File.basename(Dir.getwd)
missing = ['pandoc'].reject{|command| command_exists?(command)}
unless missing.empty?
puts "Missing dependencies: #{missing.join(', ')}."
puts "Install these and try again."
exit
end
puts " "
puts "Will generate ebooks [#{options["outputformat"].join(',')}] for the following languages #{options["lang"]}"
puts " "
options["lang"].split(',').each do |lang|
puts "convert content for '#{lang}' language"
if lang == 'zh'
figure_title = ''
else
figure_title = 'Figure'
end
dir = File.expand_path(File.join(Dir.getwd, lang))
#puts File.join(Dir.getwd, lang)
book_content = ""
Dir[File.join(dir, '**', '*.markdown')].sort.each do |input|
puts "\tProcessing #{input}"
content = File.read(input)
content.gsub!(/Insert\s+(.*)(\.png)\s*\n?\s*#{figure_title}\s+(.*)/, '![\3](figures/\1-tn\2 "\3")')
book_content << content
end
File.open("#{name}.#{lang}.markdown", 'w') do |output|
output.write(book_content)
end
# pandoc -S -s --epub-metadata=metadata.xml -o sdcamp.zh.html --epub-stylesheet=epub/ProGit.css epub/title.txt sdcamp.zh.markdown
options["outputformat"].each do | format |
system('pandoc',
'--standalone',
'--toc',
'--template=template.html',
'--epub-metadata', 'epub/metadata.xml',
'--epub-stylesheet', 'epub/book.css', # this doesn't work
'--output', "#{name}.#{lang}.#{format}",
"title.#{lang}.txt", # little strange, if this put under epub, pandoc reports error
"#{name}.#{lang}.markdown")
# pandoc -S --epub-metadata=metadata.xml -o progit.epub title.txt
puts("#{name}.#{lang}.#{format} is generated")
end
end
end
# http://stackoverflow.com/questions/5074327/most-appropriate-way-to-generate-directory-of-files-from-directory-of-template-f
def generate_project(project)
destination = project
source = File.dirname(__FILE__)+"/../templates"
#puts "generate project \"#{destination}\" from source \"#{source}\""
FileUtils.rmtree(destination)
FileUtils.mkdir_p(destination)
sourceroot=Pathname.new(source)
sourcerealpath = sourceroot.cleanpath
puts "generate project \"#{destination}\" from source \"#{sourcerealpath}\""
Dir.glob(File.join(source, '**/*')).each do |path|
pathname = Pathname.new(path)
relative = pathname.relative_path_from(sourceroot)
#puts "parent:" , sourceroot
#puts "relative:", relative
if File.directory?(pathname)
destdir = File.join(destination, relative.dirname)
#puts "create #{destdir} "
FileUtils.mkdir_p(destdir)
else
FileUtils.mkdir_p(File.join(destination, relative.dirname))
if pathname.extname == '.erb'
#puts pathname.basename.sub(/\.erb$/, '')
#puts destination
#puts File.join(destination,pathname.basename.sub(/\.erb$/, ''))
File.open(File.join(destination,pathname.basename.sub(/\.erb$/, '')), 'w') do |file|
file.puts(ERB.new(File.read(path)).result(binding))
end
else
print pathname.cleanpath, " => ", File.join(destination, relative.dirname),"\n"
FileUtils.cp(pathname, File.join(destination, relative.dirname))
end
end
end
end
main
Jump to Line
Something went wrong with that request. Please try again.