-
Notifications
You must be signed in to change notification settings - Fork 0
/
read.go
166 lines (145 loc) · 3.65 KB
/
read.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
package mbox
import (
"bufio"
"bytes"
"errors"
"fmt"
"io"
"net/mail"
"os"
)
// MessageListener gets the From_ line, including carriage return and line feed,
// plus the message in parsed and unparsed form. Implementations must not retain
// raw nor msg.Body.
type MessageListener func(fromLine string, raw []byte, msg *mail.Message)
// ErrNotMbox signals file rejection.
var ErrNotMbox = errors.New("not an mbox")
// ReadFile calls the listener for each entry read from file. The return is
// io.EOF if, and only if the file has no content.
func ReadFile(file string, onMessage MessageListener) error {
// input stream
f, err := os.Open(file)
if err != nil {
return err
}
defer f.Close()
r := bufio.NewReader(f)
// first From_ line
line, err := r.ReadSlice('\n')
switch {
case err == nil:
if !IsFromLine(line) {
return fmt.Errorf("%s:1: %w", file, ErrNotMbox)
}
case errors.Is(err, bufio.ErrBufferFull):
if string(line[:5]) != "From " {
return fmt.Errorf("%s:1: %w", file, ErrNotMbox)
}
return fmt.Errorf("%s:1: From_ line exceeds %d bytes: %w", file, r.Size(), ErrNotMbox)
case errors.Is(err, io.EOF):
switch {
case len(line) == 0:
return io.EOF
case line[0] != 'F',
len(line) > 1 && line[1] != 'r',
len(line) > 2 && line[2] != 'o',
len(line) > 3 && line[3] != 'm',
len(line) > 4 && line[4] != ' ':
return fmt.Errorf("%s: %w", file, ErrNotMbox)
default:
return fmt.Errorf("%s:1: From_ line got %w", file, io.ErrUnexpectedEOF)
}
default:
return err
}
fromLine := string(line)
fromLineN := 1
var buf bytes.Buffer
for lineN := 2; ; lineN++ {
line, err := r.ReadSlice('\n')
switch {
case err == nil:
if !IsFromLine(line) {
buf.Write(line)
continue // hot-path
}
// call listener
raw := buf.Bytes()
msg, err := mail.ReadMessage(&buf)
if err != nil {
return fmt.Errorf("%s:%d–%d: %w", file, fromLineN, lineN-1, err)
}
onMessage(fromLine, raw, msg)
// next
fromLine = string(line)
buf.Reset()
case errors.Is(err, io.EOF):
if len(line) != 0 {
return fmt.Errorf("%s:%d: %w", file, lineN, io.ErrUnexpectedEOF)
}
// call listener (last time)
raw := buf.Bytes()
msg, err := mail.ReadMessage(&buf)
if err != nil {
return fmt.Errorf("%s:%d–%d: %w", file, fromLineN, lineN-1, err)
}
onMessage(fromLine, raw, msg)
return nil // done
case errors.Is(err, bufio.ErrBufferFull):
buf.Write(line)
err = copyLine(&buf, r)
if err != nil {
return fmt.Errorf("%s:%d: %w", file, lineN, err)
}
default:
return fmt.Errorf("%s:%d: %w", file, lineN, err)
}
}
}
func copyLine(buf *bytes.Buffer, r *bufio.Reader) error {
for {
line, err := r.ReadSlice('\n')
buf.Write(line)
switch {
case err == nil:
return nil
case errors.Is(err, bufio.ErrBufferFull):
continue
case errors.Is(err, io.EOF):
return fmt.Errorf("excessive line got %w", io.ErrUnexpectedEOF)
default:
return fmt.Errorf("excessive line stranded: %w", err)
}
}
}
// IsFromLine returns whether line matches the mbox header pattern.
func IsFromLine(line []byte) bool {
switch {
case len(line) < 12,
line[0] != 'F',
line[1] != 'r',
line[2] != 'o',
line[3] != 'm',
line[4] != ' ',
line[len(line)-7] != ' ',
line[len(line)-2] != '\r',
line[len(line)-1] != '\n':
return false
}
// year should be ASCII decimal
m := line[len(line)-6]
c := line[len(line)-5]
d := line[len(line)-4]
y := line[len(line)-3]
switch {
case
m < '0' || m > '9',
c < '0' || c > '9',
d < '0' || d > '9',
y < '0' || y > '9':
return false
}
addrLen := bytes.IndexByte(line[5:], ' ')
_, err := mail.ParseAddress(string(line[5 : 5+addrLen]))
return err == nil
}