forked from asifjalil/cli
/
strutil.go
71 lines (64 loc) · 1.69 KB
/
strutil.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
package cli
import (
"unicode/utf16"
"unicode/utf8"
)
const (
replacementChar = '\uFFFD' // Unicode replacement character
// 0xd800-0xdc00 encodes the high 10 bits of a pair.
// 0xdc00-0xe000 encodes the low 10 bits of a pair.
// the value is those 20 bits plus 0x10000.
surr1 = 0xd800
surr2 = 0xdc00
surr3 = 0xe000
)
// StringToUTF16 returns the UTF-16 encoding of the UTF-8 string s,
// with a terminating NUL added.
func stringToUTF16(s string) []uint16 { return utf16.Encode([]rune(s + "\x00")) }
// UTF16ToString returns the UTF-8 encoding of the UTF-16 sequence s,
// with a terminating NUL removed.
func utf16ToString(s []uint16) string {
for i, v := range s {
if v == 0 {
s = s[0:i]
break
}
}
return string(utf16.Decode(s))
}
// StringToUTF16Ptr returns pointer to the UTF-16 encoding of
// the UTF-8 string s, with a terminating NUL added.
func stringToUTF16Ptr(s string) *uint16 { return &stringToUTF16(s)[0] }
// utf16toutf8 returns the UTF-8 encoding of the UTF-16 sequence s,
// with a terminating NUL removed.
func utf16ToUTF8(s []uint16) []byte {
for i, v := range s {
if v == 0 {
s = s[0:i]
break
}
}
buf := make([]byte, 0, len(s)*2) // allow 2 bytes for every rune
b := make([]byte, 4)
for i := 0; i < len(s); i++ {
var rr rune
switch r := s[i]; {
case surr1 <= r && r < surr2 && i+1 < len(s) &&
surr2 <= s[i+1] && s[i+1] < surr3:
// valid surrogate sequence
rr = utf16.DecodeRune(rune(r), rune(s[i+1]))
i++
case surr1 <= r && r < surr3:
// invalid surrogate sequence
rr = replacementChar
default:
// normal rune
rr = rune(r)
}
b := b[:cap(b)]
n := utf8.EncodeRune(b, rr)
b = b[:n]
buf = append(buf, b...)
}
return buf
}