/
encode.go
114 lines (98 loc) · 3.39 KB
/
encode.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
package util
import (
"github.com/loggie-io/loggie/pkg/core/log"
"golang.org/x/text/encoding"
"golang.org/x/text/encoding/charmap"
"golang.org/x/text/encoding/japanese"
"golang.org/x/text/encoding/korean"
"golang.org/x/text/encoding/simplifiedchinese"
"golang.org/x/text/encoding/traditionalchinese"
"golang.org/x/text/encoding/unicode"
)
var AllEncodings = map[string]encoding.Encoding{
// default
"nop": encoding.Nop,
"plain": encoding.Nop,
"utf-8": encoding.Nop,
// simplified chinese
"gbk": simplifiedchinese.GBK, // shadow htmlindex using 'GB10830' for GBK
"gb18030": simplifiedchinese.GB18030,
"hzgb2312": simplifiedchinese.HZGB2312,
// traditional chinese
"big5": traditionalchinese.Big5,
// japanese
"euc-jp": japanese.EUCJP,
"iso2022-jp": japanese.ISO2022JP,
"shift-jis": japanese.ShiftJIS,
// korean
"euc-kr": korean.EUCKR,
// 8bit charmap encodings
"iso8859-6e": charmap.ISO8859_6E,
"iso8859-6i": charmap.ISO8859_6I,
"iso8859-8e": charmap.ISO8859_8E,
"iso8859-8i": charmap.ISO8859_8I,
"iso8859-1": charmap.ISO8859_1, // latin-1
"iso8859-2": charmap.ISO8859_2, // latin-2
"iso8859-3": charmap.ISO8859_3, // latin-3
"iso8859-4": charmap.ISO8859_4, // latin-4
"iso8859-5": charmap.ISO8859_5, // latin/cyrillic
"iso8859-6": charmap.ISO8859_6, // latin/arabic
"iso8859-7": charmap.ISO8859_7, // latin/greek
"iso8859-8": charmap.ISO8859_8, // latin/hebrew
"iso8859-9": charmap.ISO8859_9, // latin-5
"iso8859-10": charmap.ISO8859_10, // latin-6
"iso8859-13": charmap.ISO8859_13, // latin-7
"iso8859-14": charmap.ISO8859_14, // latin-8
"iso8859-15": charmap.ISO8859_15, // latin-9
"iso8859-16": charmap.ISO8859_16, // latin-10
// ibm codepages
"cp437": charmap.CodePage437,
"cp850": charmap.CodePage850,
"cp852": charmap.CodePage852,
"cp855": charmap.CodePage855,
"cp858": charmap.CodePage858,
"cp860": charmap.CodePage860,
"cp862": charmap.CodePage862,
"cp863": charmap.CodePage863,
"cp865": charmap.CodePage865,
"cp866": charmap.CodePage866,
"ebcdic-037": charmap.CodePage037,
"ebcdic-1040": charmap.CodePage1140,
"ebcdic-1047": charmap.CodePage1047,
// cyrillic
"koi8r": charmap.KOI8R,
"koi8u": charmap.KOI8U,
// macintosh
"macintosh": charmap.Macintosh,
"macintosh-cyrillic": charmap.MacintoshCyrillic,
// windows
"windows1250": charmap.Windows1250, // central and eastern european
"windows1251": charmap.Windows1251, // russian, serbian cyrillic
"windows1252": charmap.Windows1252, // legacy
"windows1253": charmap.Windows1253, // modern greek
"windows1254": charmap.Windows1254, // turkish
"windows1255": charmap.Windows1255, // hebrew
"windows1256": charmap.Windows1256, // arabic
"windows1257": charmap.Windows1257, // estonian, latvian, lithuanian
"windows1258": charmap.Windows1258, // vietnamese
"windows874": charmap.Windows874,
// utf16 bom codecs
"utf-16be-bom": unicode.UTF16(unicode.BigEndian, unicode.UseBOM),
"utf-16le-bom": unicode.UTF16(unicode.LittleEndian, unicode.UseBOM),
}
func Encode(charset string, context []byte) ([]byte, error) {
if charset == "utf-8" {
return context, nil
}
codec, ok := AllEncodings[charset]
if !ok {
log.Warn("unknown Charset('%s')", charset)
charset = "utf-8"
codec, _ = AllEncodings[charset]
}
bytes, err := codec.NewEncoder().Bytes(context)
if err != nil {
return context, err
}
return bytes, nil
}