/
utf16_to_utf8.go
42 lines (39 loc) · 1.08 KB
/
utf16_to_utf8.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
// Copyright (c) 2024 Michael D Henderson. All rights reserved.
package adapters
import (
"bytes"
"encoding/binary"
"unicode/utf16"
"unicode/utf8"
)
func UTF16ToUTF8(data []byte) ([]byte, error) {
if len(data)%2 != 0 {
// UTF-16 data must contain an even number of bytes
return nil, ErrMissingFinalByte
}
// verify the BOM
if bytes.HasPrefix(data, []byte{0xfe, 0xff}) {
// as expected
} else if bytes.HasPrefix(data, []byte{0xff, 0xfe}) {
return nil, ErrNotBigEndianUTF16Encoded
} else {
return nil, ErrMissingBOM
}
// consume the BOM
data = data[2:]
// convert the slice of byte to a slice of uint16
chars := make([]uint16, len(data)/2)
if err := binary.Read(bytes.NewReader(data), binary.BigEndian, &chars); err != nil {
return nil, err
}
// create a buffer for the results
dst := bytes.Buffer{}
// convert the UTF-16 to runes, then to UTF-8 bytes
var utfBuffer [utf8.UTFMax]byte
for _, r := range utf16.Decode(chars) {
utf8Size := utf8.EncodeRune(utfBuffer[:], r)
dst.Write(utfBuffer[:utf8Size])
}
// finally, return the results
return dst.Bytes(), nil
}