-
Notifications
You must be signed in to change notification settings - Fork 2
/
htmlmaker.rb
307 lines (249 loc) · 11.7 KB
/
htmlmaker.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
require 'fileutils'
require_relative '../header.rb'
require_relative '../metadata.rb'
# ---------------------- VARIABLES
local_log_hash, @log_hash = Bkmkr::Paths.setLocalLoghash
filetype = Bkmkr::Project.filename_split.split(".").pop
project_html_file = File.join(Bkmkr::Paths.project_tmp_dir, "#{Bkmkr::Project.filename}.html")
docxtoxml_py = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "docxtoxml.py")
saxonpath = File.join(Bkmkr::Paths.resource_dir, "saxon", "#{Bkmkr::Tools.xslprocessor}.jar")
source_xml = File.join(Bkmkr::Paths.project_tmp_dir, "#{Bkmkr::Project.filename}.xml")
word_to_html_xsl = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "wordtohtml.xsl")
headings_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "headings.js")
inlines_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "inlines.js")
evaluate_pis = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "evaluate_pis.js")
title_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "title.js")
xslonly_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "xsl_only.js")
version_metatag_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "version_metatag.js")
preformatted_js = File.join(Bkmkr::Paths.core_dir, "htmlmaker", "preformatted.js")
# ---------------------- METHODS
def readConfigJson(logkey='')
data_hash = Mcmlln::Tools.readjson(Metadata.configfile)
return data_hash
rescue => logstring
return {}
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
## wrapping Bkmkr::Tools.runnode in a new method for this script; to return a result for json_logfile
def htmlmakerRunNode(jsfile, args, logkey='')
Bkmkr::Tools.runnode(jsfile, args)
rescue => logstring
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
## wrapping a Mcmlln::Tools method in a new method for this script; to return a result for json_logfile
def copyFile(srcFile, destFile, logkey='')
Mcmlln::Tools.copyFile(srcFile, destFile)
rescue => logstring
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
def readOutputHtml(logkey='')
filecontents = File.read(Bkmkr::Paths.outputtmp_html)
return filecontents
rescue => logstring
return ''
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
# for xsl conversion
## wrapping Bkmkr::Tools.runpython in a new method for this script; to return a result for json_logfile
def convertdocxtoxml(filetype, docxtoxml_py, logkey='')
unless filetype == "html"
Bkmkr::Tools.runpython(docxtoxml_py, Bkmkr::Paths.project_docx_file)
else
logstring = 'input file is html, skipping'
end
rescue => logstring
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
# for xsl conversion
def convertxmltohtml(filetype, saxonpath, source_xml, word_to_html_xsl, logkey='')
unless filetype == "html"
`java -jar "#{saxonpath}" -s:"#{source_xml}" -xsl:"#{word_to_html_xsl}" -o:"#{Bkmkr::Paths.outputtmp_html}"`
else
Mcmlln::Tools.copyFile(Bkmkr::Paths.project_tmp_file, Bkmkr::Paths.outputtmp_html)
logstring = 'input file is html, skipping (copied input file to project_tmp)'
end
rescue => logstring
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
def fixFootnotes(content, super_cs, logkey='')
# place footnote text inline per htmlbook
filecontents = content.gsub(/(<span class=")(#{super_cs})(" id="\d+")/,"\\1FootnoteReference\\3")
.gsub(/(<span class="#{super_cs}">)(<span class="FootnoteReference" id="\d+"><\/span>)(<\/span>)/,"\\2")
footnotes = content.scan(/(<div class="footnotetext" id=")(\d+)(">)(\s?)(.*?)(<\/div>)/)
footnotes.each do |f|
noteref = f[1]
notetext = f[4].gsub(/<p/,"<span").gsub(/<\/p/,"</span")
filecontents = filecontents.gsub(/<span class="FootnoteReference" id="#{noteref}"><\/span>/,"<span data-type=\"footnote\" id=\"footnote_#{noteref}\">#{notetext}</span>")
.gsub(/<span class="FootnoteReference" id="#{noteref}"\/>/,"<span data-type=\"footnote\" id=\"footnote_#{noteref}\">#{notetext}</span>")
end
return filecontents
rescue => logstring
return content
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
def fixEndnotes(content, logkey='')
# add endnote ref id as static content
filecontents = content.gsub(/(<span class=")(.ndnote.eference)(" id=")(\d+)(">)(<\/span>)/,"\\1endnotereference\\3endnoteref_\\4\\5\\4\\6")
.gsub(/(div class="endnotetext" id=")/,"\\1endnotetext_")
return filecontents
rescue => logstring
return content
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
def fixEntities(content, logkey='')
filecontents = content.gsub(/ /," ")
.gsub(/(<img.*?)(>)/,"\\1/\\2")
.gsub(/(<br)(>)/,"\\1/\\2")
return filecontents
rescue => logstring
return content
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
# WDV-314: multiple gsubs to facilitate leading/trailing whitespace that should be there,
# => but may not due to html tags &/or sentences beginning or ending with ellipses
# Best case, user runs cleanup macro prior. This is a fallback
def fixEllipseCharacter(content, logkey='')
filecontents = content.gsub(/(\w)(\s?…\s?)(\w)/,'\1 . . . \3')
.gsub(/(\w)(\s?…)/,'\1 . . .')
.gsub(/(…\s?)(\w)/,'. . . \2')
.gsub(/…/,'. . .')
return filecontents
rescue => logstring
return content
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
## wrapping a Mcmlln::Tools method in a new method for this script; to return a result for json_logfile
def overwriteFile(path,filecontents, logkey='')
Mcmlln::Tools.overwriteFile(path, filecontents)
rescue => logstring
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
def stripEndnotes(content, logkey='')
# removes endnotes section if no content
filecontents = content
endnote_txt = content.match(/(<section data-type=\"appendix\" class=\"endnotes\".*?\">)((.|\n)*?)(<\/section>)/).to_s
unless endnote_txt.include?("<p ")
filecontents = content.gsub(/(<section data-type=\"appendix\" class=\"endnotes\".*?\">)((.|\n)*?)(<\/section>)/,"")
end
return filecontents
rescue => logstring
return content
ensure
Mcmlln::Tools.logtoJson(@log_hash, logkey, logstring)
end
# ---------------------- PROCESSES
data_hash = readConfigJson('read_config_json')
#local definition(s) based on config.json
doctemplate_version = data_hash['doctemplate_version']
doctemplatetype = data_hash['doctemplatetype']
# setting name of htmlmaker_js files based on doctemplatetype
# also setting names of hardcoded styles by template:
if doctemplatetype == 'rsuite'
htmlmakerjs_path = File.join(Bkmkr::Paths.scripts_dir, "htmlmaker_js_rsuite")
super_cs = 'supersup'
else
htmlmakerjs_path = File.join(Bkmkr::Paths.scripts_dir, "htmlmaker_js")
super_cs = 'spansuperscriptcharacterssup'
end
# setting path for all htmlmaker_js files based on repo / doctemplatetype
htmlmaker_bin = File.join(htmlmakerjs_path, 'bin', 'htmlmaker')
styles_json = File.join(htmlmakerjs_path, 'styles.json')
stylefunctions_js = File.join(htmlmakerjs_path, 'style-functions.js')
htmltohtmlbook_js = File.join(htmlmakerjs_path, 'lib', 'htmltohtmlbook.js')
generateTOC_js = File.join(htmlmakerjs_path, 'lib', 'generateTOC.js')
# if the docx file exists, convert to html
# use doctemplatetype to determine which conversion method
# later, when we want to discontinue a method, just skip the conversion, and create errfile in filearchive_postprocessing?
# if we want to make sure no wrongly constructed files are picked up, we can rm then in cleanup/cleanup_preprocessing too.
if File.file?(Bkmkr::Paths.project_docx_file)
case doctemplatetype
when 'rsuite'
# convert to html via htmlmaker_js
htmlmakerRunNode(htmlmaker_bin, "#{Bkmkr::Paths.project_docx_file} #{Bkmkr::Paths.project_tmp_dir} #{styles_json} #{stylefunctions_js}", 'convertdocx_to_html')
# make copy of output html to match name 'outputtmp_html'
copyFile(project_html_file, Bkmkr::Paths.outputtmp_html, 'copy_and_rename_html_to_outputtmphtml')
# convert html to htmlbook
htmlmakerRunNode(htmltohtmlbook_js, Bkmkr::Paths.outputtmp_html, 'convert_to_htmlbook')
# generateTOC
htmlmakerRunNode(generateTOC_js, Bkmkr::Paths.outputtmp_html, 'generateTOC_js')
when 'sectionstart'
# convert to html via htmlmaker_js
htmlmakerRunNode(htmlmaker_bin, "#{Bkmkr::Paths.project_docx_file} #{Bkmkr::Paths.project_tmp_dir} #{styles_json} #{stylefunctions_js}", 'convertdocx_to_html')
# make copy of output html to match name 'outputtmp_html'
copyFile(project_html_file, Bkmkr::Paths.outputtmp_html, 'copy_and_rename_html_to_outputtmphtml')
# convert html to htmlbook
htmlmakerRunNode(htmltohtmlbook_js, Bkmkr::Paths.outputtmp_html, 'convert_to_htmlbook')
# generateTOC
htmlmakerRunNode(generateTOC_js, Bkmkr::Paths.outputtmp_html, 'generateTOC_js')
when 'pre-sectionstart'
# convert docx to xml
convertdocxtoxml(filetype, docxtoxml_py, 'convert_docx_to_xml')
# convert xml to html
convertxmltohtml(filetype, saxonpath, source_xml, word_to_html_xsl, 'convert_xml_to_html')
end
# if infile was already html, rename a copy of file to 'outputtmp.html'
elsif File.file?(project_html_file)
copyFile(project_html_file, Bkmkr::Paths.outputtmp_html, 'copy_and_rename_html_to_outputtmphtml')
end
# read in html
filecontents = readOutputHtml('read_output_html_a')
# run method: fixFootnotes
filecontents = fixFootnotes(filecontents, super_cs, 'fix_footnotes')
# run method: fixEndnotes
filecontents = fixEndnotes(filecontents, 'fix_endnotes')
# run method: fixEntities
filecontents = fixEntities(filecontents, 'fix_entities')
# run method: fixEllipseCharacter
filecontents = fixEllipseCharacter(filecontents, 'fix_ellipse_character')
#write out edited html
overwriteFile(Bkmkr::Paths.outputtmp_html, filecontents, 'overwrite_output_html_a')
# # add correct markup for inlines (em, strong, sup, sub)
htmlmakerRunNode(inlines_js, "#{Bkmkr::Paths.outputtmp_html} #{doctemplatetype}", 'inlines_js')
# since in rsuite we are using <pre> blocks as containers, this makes a mess of enclosed <p>s
unless doctemplatetype == 'rsuite'
# # change p children of pre tags to spans
htmlmakerRunNode(preformatted_js, Bkmkr::Paths.outputtmp_html, 'preformatted_js')
end
# for xsl-only: I think this includes stuff from formerly included:
# footnotes.js, lists.js, parts.js, strip-toc.js, headings.js(for xsl) and some from band-aid.js
# more items from bandaid were moved to htmlpostprocessing
unless doctemplatetype == 'pre-sectionstart'
# # add headings to all sections for sectionstart
htmlmakerRunNode(headings_js, Bkmkr::Paths.outputtmp_html, 'headings_js')
else
htmlmakerRunNode(xslonly_js, Bkmkr::Paths.outputtmp_html, 'xslonly_js')
end
filecontents = readOutputHtml('read_output_html_b')
# run method: stripEndnotes
filecontents = stripEndnotes(filecontents, 'strip_endnotes')
overwriteFile(Bkmkr::Paths.outputtmp_html, filecontents, 'overwrite_output_html_b')
# set html title to match JSON
htmlmakerRunNode(title_js, "#{Bkmkr::Paths.outputtmp_html} \"#{Metadata.booktitle}\"", 'title_js')
# add meta tag to html with template_version
unless doctemplate_version.nil? || doctemplate_version.empty?
htmlmakerRunNode(version_metatag_js, "#{Bkmkr::Paths.outputtmp_html} \"#{doctemplate_version}\"", 'add_doctemplate-version_meta_tag')
end
# evaluate processing instructions
htmlmakerRunNode(evaluate_pis, "#{Bkmkr::Paths.outputtmp_html} #{doctemplatetype}", 'evaluate_pis')
# html file should exist
if File.file?("#{Bkmkr::Paths.outputtmp_html}")
test_html_status = "pass: html file was created successfully"
else
test_html_status = "FAIL: html file was created successfully"
end
@log_hash['html_status']=test_html_status
# ---------------------- LOGGING
# Write json log:
Mcmlln::Tools.logtoJson(@log_hash, 'completed', Time.now)
Mcmlln::Tools.write_json(local_log_hash, Bkmkr::Paths.json_log)