forked from andybalholm/redwood
/
convert.go
57 lines (48 loc) · 1.06 KB
/
convert.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
// The dg-convert command takes a Dansguardian weighted phrase list on standard
// input, and prints it in Redwood format on standard output. Any rules with
// phrases joined by commas are dropped.
package main
import (
"bufio"
"flag"
"fmt"
"io"
"log"
"os"
"strings"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
)
var cs = flag.String("charset", "utf-8", "input encoding")
func main() {
flag.Parse()
var in io.Reader
in = os.Stdin
if *cs != "utf-8" {
e, _ := charset.Lookup(*cs)
in = transform.NewReader(in, e.NewDecoder())
}
s := bufio.NewScanner(in)
for s.Scan() {
line := s.Text()
if strings.Contains(line, ">,<") {
continue
}
endPhrase := strings.Index(line, "><")
if endPhrase != -1 {
phrase := line[:endPhrase+1]
rest := line[endPhrase+2:]
endScore := strings.Index(rest, ">")
if endScore != -1 {
score := rest[:endScore]
rest = strings.TrimSpace(rest[endScore+1:])
fmt.Println(phrase, score, rest)
continue
}
}
fmt.Println(line)
}
if err := s.Err(); err != nil {
log.Println(err)
}
}