/
keyvalue.go
132 lines (116 loc) · 3.15 KB
/
keyvalue.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
package mutate
import (
"errors"
"fmt"
"strconv"
"strings"
"unicode"
"github.com/qiniu/logkit/utils/models"
)
const (
errMsg = "will keep origin data in pandora_stash if disable_record_errdata field is false"
)
type Parser struct {
KeepString bool
Splitter string
}
func (p *Parser) Parse(line string) ([]models.Data, error) {
pairs, err := splitKV(line, p.Splitter)
if err != nil {
return nil, err
}
// 调整数据类型
if len(pairs)%2 == 1 {
return nil, errors.New(fmt.Sprintf("key value not match, %s", errMsg))
}
data := make([]models.Data, 0, 1)
field := make(models.Data)
for i := 0; i < len(pairs); i += 2 {
// 消除双引号; 针对foo="" ,"foo=" 情况;其他情况如 a"b"c=d"e"f等首尾不出现引号的情况视作合法。
kNum := strings.Count(pairs[i], "\"")
vNum := strings.Count(pairs[i+1], "\"")
if kNum%2 == 1 && vNum%2 == 1 {
if strings.HasPrefix(pairs[i], "\"") && strings.HasSuffix(pairs[i+1], "\"") {
pairs[i] = pairs[i][1:]
pairs[i+1] = pairs[i+1][:len(pairs[i+1])-1]
}
}
if kNum%2 == 0 && len(pairs[i]) > 1 {
if strings.HasPrefix(pairs[i], "\"") && strings.HasSuffix(pairs[i], "\"") {
pairs[i] = pairs[i][1 : len(pairs[i])-1]
}
}
if vNum%2 == 0 && len(pairs[i+1]) > 1 {
if strings.HasPrefix(pairs[i+1], "\"") && strings.HasSuffix(pairs[i+1], "\"") {
pairs[i+1] = pairs[i+1][1 : len(pairs[i+1])-1]
}
}
if len(pairs[i]) == 0 || len(pairs[i+1]) == 0 {
return nil, fmt.Errorf("no value or key was parsed after logfmt, %s", errMsg)
}
value := pairs[i+1]
if !p.KeepString {
if fValue, err := strconv.ParseFloat(value, 64); err == nil {
field[pairs[i]] = fValue
continue
}
if bValue, err := strconv.ParseBool(value); err == nil {
field[pairs[i]] = bValue
continue
}
}
field[pairs[i]] = value
}
if len(field) == 0 {
return nil, fmt.Errorf("data is empty after parse, %s", errMsg)
}
data = append(data, field)
return data, nil
}
func splitKV(line string, sep string) ([]string, error) {
data := make([]string, 0, 100)
if !strings.Contains(line, sep) {
return nil, errors.New(fmt.Sprintf("no splitter exist, %s", errMsg))
}
kvArr := make([]string, 0, 100)
isKey := true
vhead := 0
lastSpace := 0
pos := 0
sepLen := len(sep)
// key或value值中包含sep的情况;默认key中不包含sep;导致algorithm = 1+1=2会变成合法
for pos+sepLen <= len(line) {
if unicode.IsSpace(rune(line[pos : pos+1][0])) {
nextSep := strings.Index(line[pos+1:], sep)
if nextSep == -1 {
break
}
if strings.TrimSpace(line[pos+1:pos+1+nextSep]) != "" {
lastSpace = pos
pos++
continue
}
}
if line[pos:pos+sepLen] == sep {
if isKey {
kvArr = append(kvArr, strings.TrimSpace(line[vhead:pos]))
isKey = false
} else {
if lastSpace <= vhead {
pos++
continue
}
kvArr = append(kvArr, strings.TrimSpace(line[vhead:lastSpace]))
kvArr = append(kvArr, strings.TrimSpace(line[lastSpace:pos]))
}
vhead = pos + sepLen
pos = pos + sepLen - 1
}
pos++
}
if vhead < len(line) {
kvArr = append(kvArr, strings.TrimSpace(line[vhead:]))
}
data = append(data, kvArr...)
return data, nil
}