/
encoding.go
86 lines (72 loc) · 1.85 KB
/
encoding.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
package main
import (
"bufio"
"bytes"
"fmt"
"github.com/saintfish/chardet"
"golang.org/x/net/html/charset"
"golang.org/x/text/transform"
"io"
"regexp"
)
var (
utf8BOM = []byte{239, 187, 191}
utf8CharsetPattern = regexp.MustCompile("(?i)utf-?8$")
)
func isUTF8Charset(charsetName string) bool {
return utf8CharsetPattern.MatchString(charsetName)
}
func addBOM(data []byte) []byte {
return append(utf8BOM, data...)
}
func stripBOM(data []byte) []byte {
return bytes.TrimPrefix(data, utf8BOM)
}
func encode(data []byte, charsetName string) ([]byte, error) {
encoding, _ := charset.Lookup(charsetName)
if encoding == nil {
return nil, fmt.Errorf("Unsupported charset: %v", charsetName)
}
reader := bytes.NewReader(data)
var b bytes.Buffer
writer := bufio.NewWriter(&b)
encodeWriter := transform.NewWriter(writer, encoding.NewEncoder())
if _, err := io.Copy(encodeWriter, reader); err != nil {
return nil, err
}
if err := writer.Flush(); err != nil {
return nil, err
}
if isUTF8Charset(charsetName) {
return addBOM(b.Bytes()), nil
}
return b.Bytes(), nil
}
func decode(data []byte, charsetName string) ([]byte, error) {
encoding, _ := charset.Lookup(charsetName)
if encoding == nil {
return nil, fmt.Errorf("Unsupported charset: %v", charsetName)
}
reader := bytes.NewReader(data)
var b bytes.Buffer
writer := bufio.NewWriter(&b)
decodeReader := transform.NewReader(reader, encoding.NewDecoder())
if _, err := io.Copy(writer, decodeReader); err != nil {
return nil, err
}
if err := writer.Flush(); err != nil {
return nil, err
}
if isUTF8Charset(charsetName) {
return stripBOM(b.Bytes()), nil
}
return b.Bytes(), nil
}
func detectEncoding(data []byte) (string, error) {
detector := chardet.NewTextDetector()
detected, err := detector.DetectBest(data)
if err != nil {
return "", err
}
return detected.Charset, nil
}