-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
112 lines (104 loc) · 2.54 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
package main
import (
"bytes"
"fmt"
"io"
"os"
"regexp"
"strings"
"github.com/alecthomas/kong"
"github.com/andybalholm/cascadia"
"github.com/yosssi/gohtml"
"golang.org/x/net/html"
)
const pseudoSelectorText = "::text"
var pseudoSelectorAttr = regexp.MustCompile(`::attr\(([^)"'>/=\s]+)\)\s*$`)
// Runs a query selector query using goquery on the given file
func main() {
if err := run(); err != nil {
fmt.Fprintln(os.Stderr, err)
os.Exit(1)
}
}
func run() error {
cli := struct {
Selector string `arg:"" help:"the CSS selector to run"`
In string `arg:"" help:"file to read input from. If unset, reads from stdin" optional:""`
Number int `help:"Print only the n-th match, starting at 1" short:"n"`
Text bool `help:"Recursively print the inner text instead of HTML"`
}{}
kong.Parse(&cli)
var in io.Reader
if cli.In != "" {
f, err := os.Open(cli.In)
if err != nil {
return err
}
defer f.Close()
in = f
} else {
in = os.Stdin
}
doc, err := html.Parse(in)
if err != nil {
return err
}
rawSelector := cli.Selector
if strings.HasSuffix(rawSelector, pseudoSelectorText) {
rawSelector = strings.TrimSuffix(rawSelector, pseudoSelectorText)
cli.Text = true
}
selectAttr := ""
if matches := pseudoSelectorAttr.FindStringSubmatch(rawSelector); matches != nil {
selectAttr = strings.ToLower(matches[1])
idx := pseudoSelectorAttr.FindStringIndex(rawSelector)[0]
rawSelector = rawSelector[0:idx]
}
selector, err := cascadia.Compile(rawSelector)
if err != nil {
return err
}
nodes := cascadia.QueryAll(doc, selector)
buf := bytes.NewBuffer(nil)
if cli.Number != 0 && cli.Number > len(nodes) {
return fmt.Errorf("wanted match %d, but only got %d matches", cli.Number, len(nodes))
}
if cli.Number > 0 {
nodes = []*html.Node{nodes[cli.Number-1]}
}
if cli.Text {
for _, n := range nodes {
buf := &bytes.Buffer{}
walk(n, func(node *html.Node) {
if node.Type == html.TextNode {
buf.WriteString(node.Data)
}
})
fmt.Println(buf.String())
}
} else {
for _, n := range nodes {
if selectAttr != "" {
for _, attr := range n.Attr {
if selectAttr == strings.ToLower(attr.Key) {
buf.WriteString(attr.Val)
buf.WriteRune('\n')
}
}
} else if err := html.Render(buf, n); err != nil {
return err
}
}
fmt.Println(string(gohtml.FormatBytes(buf.Bytes())))
}
return nil
}
func walk(n *html.Node, visit func(node *html.Node)) {
visit(n)
if n.FirstChild == nil {
return
}
for n = n.FirstChild; n != nil; n = n.NextSibling {
walk(n, visit)
}
}