/
default.rb
90 lines (64 loc) · 1.82 KB
/
default.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
module ODFReport
module Parser
# Default HTML parser
#
# sample HTML
#
# <p> first paragraph </p>
# <p> second <strong>paragraph</strong> </p>
# <blockquote>
# <p> first <em>quote paragraph</em> </p>
# <p> first quote paragraph </p>
# <p> first quote paragraph </p>
# </blockquote>
# <p> third <strong>paragraph</strong> </p>
#
# <p style="margin: 100px"> fourth <em>paragraph</em> </p>
# <p style="margin: 120px"> fifth paragraph </p>
# <p> sixth <strong>paragraph</strong> </p>
#
class Default
attr_accessor :paragraphs
def initialize(text, template_node)
@text = text
@paragraphs = []
@template_node = template_node
parse
end
def parse
xml = @template_node.parse(@text)
xml.css("p", "h1", "h2").each do |p|
style = check_style(p)
text = parse_formatting(p.inner_html)
add_paragraph(text, style)
end
end
def add_paragraph(text, style)
node = @template_node.dup
node['text:style-name'] = style if style
node.children = text
@paragraphs << node
end
private
def parse_formatting(text)
text.strip!
text.gsub!(/<strong>(.+?)<\/strong>/) { "<text:span text:style-name=\"bold\">#{$1}<\/text:span>" }
text.gsub!(/<em>(.+?)<\/em>/) { "<text:span text:style-name=\"italic\">#{$1}<\/text:span>" }
text.gsub!(/<u>(.+?)<\/u>/) { "<text:span text:style-name=\"underline\">#{$1}<\/text:span>" }
text.gsub!("\n", "")
text
end
def check_style(node)
style = nil
if node.name =~ /h\d/i
style = "title"
elsif node.parent && node.parent.name == "blockquote"
style = "quote"
elsif node['style'] =~ /margin/
style = "quote"
end
style
end
end
end
end