/
table2rst.go
125 lines (112 loc) · 2.33 KB
/
table2rst.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
package table2rst
import (
"golang.org/x/net/html"
"strings"
)
type ElementType int
const (
TextNode ElementType = iota
tableElementNode
tbodyElementNode
trElementNode
tdElementNode
NoNeedToKnow
)
func getElementType(n *html.Node) ElementType {
if n.Type == html.TextNode {
return TextNode
}
if n.Type == html.ElementNode && n.Data == "td" {
return tdElementNode
}
if n.Type == html.ElementNode && n.Data == "tr" {
return trElementNode
}
if n.Type == html.ElementNode && n.Data == "tbody" {
return tbodyElementNode
}
if n.Type == html.ElementNode && n.Data == "table" {
return tableElementNode
}
return NoNeedToKnow
}
func getTextNodeRst(text *html.Node) string {
return strings.TrimSpace(text.Data)
}
func getTdRst(td *html.Node) string {
s := ""
for c := td.FirstChild; c != nil; c = c.NextSibling {
if getElementType(c) == TextNode {
s += (getTextNodeRst(c) + "\n")
continue
}
panic("cannot convert this td")
}
return s
}
func getTrRst(tr *html.Node) string {
s := ""
isFirstTd := true
for c := tr.FirstChild; c != nil; c = c.NextSibling {
if getElementType(c) == tdElementNode {
if isFirstTd {
s += (" * - " + getTdRst(c))
isFirstTd = false
} else {
s += (" - " + getTdRst(c))
}
continue
}
if getElementType(c) == TextNode {
s += getTextNodeRst(c)
continue
}
panic("cannot convert this tr")
}
return s
}
func getTbodyRst(tbody *html.Node) string {
s := ""
for c := tbody.FirstChild; c != nil; c = c.NextSibling {
if getElementType(c) == trElementNode {
s += getTrRst(c)
continue
}
if getElementType(c) == TextNode {
s += getTextNodeRst(c)
continue
}
panic("cannot convert this tbody")
}
return s
}
func getTableRst(table *html.Node) string {
s := ".. list-table::\n\n"
for c := table.FirstChild; c != nil; c = c.NextSibling {
if getElementType(c) == tbodyElementNode {
s += getTbodyRst(c)
continue
}
if getElementType(c) == TextNode {
s += getTextNodeRst(c)
continue
}
panic("cannot convert this table")
}
return s
}
func traverse(n *html.Node) string {
s := ""
for c := n.FirstChild; c != nil; c = c.NextSibling {
if getElementType(c) == tableElementNode {
s += getTableRst(c)
continue
} else {
s += traverse(c)
}
}
return s
}
func HtmlTableToRstListTable(doc *html.Node) string {
return traverse(doc)
}