forked from yunabe/lgo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
utf8_reader.go
63 lines (57 loc) · 1.29 KB
/
utf8_reader.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
package main
import (
"errors"
"io"
"unicode/utf8"
)
var errBufTooSmall = errors.New("buf is too small")
type utf8AwareReader struct {
reader io.Reader
residual []byte
pendingErr error
}
func newUTF8AwareReader(r io.Reader) *utf8AwareReader {
return &utf8AwareReader{
reader: r,
residual: make([]byte, 0, utf8.UTFMax-1),
}
}
func (r *utf8AwareReader) Read(p []byte) (int, error) {
if r.pendingErr != nil {
err := r.pendingErr
r.pendingErr = nil
return 0, err
}
if len(p) < utf8.UTFMax*2 {
return 0, errBufTooSmall
}
if len(p) <= len(r.residual) {
panic("r.residual must be smaller than utf8.UTFMax")
}
copy(p, r.residual)
n, err := r.reader.Read(p[len(r.residual):])
if n == 0 && err != nil && len(r.residual) > 0 {
r.pendingErr = err
copy(p, r.residual)
n = len(r.residual)
r.residual = r.residual[:0]
return n, nil
}
n += len(r.residual)
if err != nil {
// e.g. io.EOF
r.residual = r.residual[:0]
return n, err
}
for i := 0; i < utf8.UTFMax && i < n; i++ {
ru, _ := utf8.DecodeLastRune(p[:n-i])
if ru != utf8.RuneError {
r.residual = r.residual[:i]
copy(r.residual, p[n-i:])
return n - i, nil
}
}
// The last utf8.UTFMax bytes are invalid as UTF8. It means the data is not valid UTF8 string.
// Return everything.
return n, nil
}