-
Notifications
You must be signed in to change notification settings - Fork 7
/
custom.go
123 lines (112 loc) · 2.12 KB
/
custom.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
package wubi
import (
"bufio"
"bytes"
"strconv"
"strings"
"slices"
"golang.org/x/net/html/charset"
)
type Custom struct {
Template
sep string
rule []string
encoding string
}
// 规则用"|"分割,第一项是分隔符
//
// t: tab, s: space; w: word, c: code, r: rank
//
// 多多 t|w|c,冰凌 t|c|w
func newCustom(rule string, e string) *Custom {
f := new(Custom)
f.CanMarshal = true
f.encoding = e
s := strings.Split(rule, "|")
switch s[0] {
case "t":
f.sep = "\t"
case "s":
f.sep = " "
default:
f.sep = s[0]
}
f.rule = s[1:]
return f
}
func init() {
FormatList = append(FormatList, NewBingling(), NewDuoduo())
}
func NewDuoduo() *Custom {
f := newCustom("t|w|c", "UTF-16LE")
f.Name = "多多"
f.ID = "duoduo,dd"
return f
}
func NewBingling() *Custom {
f := newCustom("t|c|w", "UTF-8")
f.Name = "冰凌"
f.ID = "bingling,bl"
return f
}
func (f *Custom) Unmarshal(r *bytes.Reader) []*Entry {
di := make([]*Entry, 0, r.Size()>>8)
scan := bufio.NewScanner(r)
for scan.Scan() {
entry := strings.Split(scan.Text(), f.sep)
var word, code string
var rank int
for i := range f.rule {
if i >= len(entry) {
break
}
switch f.rule[i] {
case "w":
word = entry[i]
case "c":
code = entry[i]
case "r":
rank, _ = strconv.Atoi(entry[i])
}
}
di = append(di, &Entry{word, code, rank})
}
if slices.Contains(f.rule, "r") {
f.HasRank = true
}
return di
}
func (f *Custom) Marshal(di []*Entry, hasRank bool) []byte {
var buf bytes.Buffer
buf.Grow(len(di))
// 生成 Rank
if slices.Contains(f.rule, "r") && !hasRank {
di = GenRank(di)
}
e, name := charset.Lookup(f.encoding)
// bom
switch name {
case "utf-16le":
buf.Write([]byte{0xff, 0xfe})
case "utf-16be":
buf.Write([]byte{0xfe, 0xff})
}
w := e.NewEncoder().Writer(&buf)
for _, v := range di {
for i := range f.rule {
switch f.rule[i] {
case "w":
w.Write([]byte(v.Word))
case "c":
w.Write([]byte(v.Code))
case "r":
w.Write([]byte(strconv.Itoa(v.Rank)))
}
if i != len(f.rule)-1 {
w.Write([]byte(f.sep))
}
}
w.Write([]byte{'\r', '\n'})
}
return buf.Bytes()
}