/
parser.go
157 lines (145 loc) · 3.58 KB
/
parser.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
package parser
import (
"bufio"
"errors"
"fmt"
"io"
"strings"
)
// FileHeader represents the header of a W3C Extended Log Format file.
type FileHeader struct {
fieldNames []string
Software string
Remark string
Meta map[string]string
}
func (h *FileHeader) HasField(name string) bool {
for _, fname := range h.fieldNames {
if fname == name {
return true
}
}
return false
}
func (h *FileHeader) HasGmtTime() bool {
return h.HasField("gmttime")
}
// FieldNames returns a copy of the field names
func (h *FileHeader) FieldNames() (ret []string) {
if len(h.fieldNames) == 0 {
return nil
}
ret = make([]string, 0, len(h.fieldNames))
for _, name := range h.fieldNames {
ret = append(ret, name)
}
return ret
}
func parseFileHeader(reader *bufio.Reader) (*FileHeader, error) {
h := new(FileHeader)
h.Meta = make(map[string]string)
for {
c, err := reader.Peek(1)
if err != nil {
return nil, err
}
if c[0] != '#' {
break
}
metaline, err := reader.ReadString('\n')
if err != nil {
return nil, err
}
metaline = strings.TrimSpace(metaline[1:])
if len(metaline) > 0 {
kv := strings.SplitN(metaline, ":", 2)
if len(kv) == 2 {
key := strings.ToLower(strings.TrimSpace(kv[0]))
value := strings.TrimSpace(kv[1])
switch key {
case "software":
h.Software = value
case "remark":
h.Remark = value
case "fields":
h.fieldNames = make([]string, 0)
for _, f := range strings.Split(value, " ") {
f = strings.ToLower(strings.TrimSpace(f))
if len(f) > 0 {
h.fieldNames = append(h.fieldNames, f)
}
}
default:
h.Meta[key] = value
}
}
}
}
return h, nil
}
// FileParser is used to parse a W3C Extended Log Format file.
type FileParser struct {
FileHeader
reader *bufio.Reader
scanner *Scanner
}
// NewFileParser constructs a FileParser
func NewFileParser(reader io.Reader) *FileParser {
var bufreader *bufio.Reader
if r, ok := reader.(*bufio.Reader); ok {
bufreader = r
} else {
// use a big buffer to minimize disk reads
bufreader = bufio.NewReaderSize(reader, 16*1024*1024)
}
parser := FileParser{
reader: bufreader,
scanner: NewScanner(bufreader),
}
return &parser
}
// ParseHeader is used to parse the header part of a W3C Extended Log Format file.
// The io.Reader should be at the start of the file.
func (p *FileParser) ParseHeader() error {
header, err := parseFileHeader(p.reader)
if err != nil {
return err
}
p.FileHeader = *header
return nil
}
// SetFieldNames can be used to set the Field names manually, instead of parsing the header file.
func (p *FileParser) SetFieldNames(fieldNames []string) *FileParser {
p.FileHeader.fieldNames = fieldNames
return p
}
// Next returns the next parsed log line.
func (p *FileParser) Next() (*Line, error) {
return p.NextTo(nil)
}
// NextTo returns the next parsed log line, reusing the given line.
func (p *FileParser) NextTo(l *Line) (*Line, error) {
if len(p.FileHeader.fieldNames) == 0 {
return nil, errors.New("No field names")
}
var name string
var i int
if p.scanner.Scan() {
if l == nil {
// allocate a new line
l = NewLine(p.FileHeader.fieldNames)
} else {
// reuse the given line, but make sure to clean it before usage
l.Reset(p.FileHeader.fieldNames)
}
fields := p.scanner.Strings()
if len(fields) != len(p.FileHeader.fieldNames) {
return nil, fmt.Errorf("Wrong number of fields: expected = %d, actual = %d", len(p.FileHeader.fieldNames), len(fields))
}
for i, name = range p.FileHeader.fieldNames {
l.add(name, fields[i])
}
return l, nil
}
return nil, p.scanner.Err()
}