/
encoder.go
101 lines (84 loc) · 3.07 KB
/
encoder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
package textencoding
import (
"encoding/binary"
"github.com/nareshrajaram/unipdf/common"
"github.com/nareshrajaram/unipdf/core"
)
// CharCode is a character code used in the specific encoding.
type CharCode uint16
// GlyphName is a name of a glyph.
type GlyphName string
// TextEncoder defines the common methods that a text encoder implementation must have in UniDoc.
type TextEncoder interface {
// String returns a string that describes the TextEncoder instance.
String() string
// Encode converts the Go unicode string to a PDF encoded string.
Encode(str string) []byte
// Decode converts PDF encoded string to a Go unicode string.
Decode(raw []byte) string
// RuneToCharcode returns the PDF character code corresponding to rune `r`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as RuneToGlyph->GlyphToCharcode
RuneToCharcode(r rune) (CharCode, bool)
// CharcodeToRune returns the rune corresponding to character code `code`.
// The bool return flag is true if there was a match, and false otherwise.
// This is usually implemented as CharcodeToGlyph->GlyphToRune
CharcodeToRune(code CharCode) (rune, bool)
// ToPdfObject returns a PDF Object that represents the encoding.
ToPdfObject() core.PdfObject
}
// Convenience functions
// encodeString8bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// It expects that character codes will fit into a single byte.
func encodeString8bit(enc TextEncoder, raw string) []byte {
encoded := make([]byte, 0, len(raw))
for _, r := range raw {
code, found := enc.RuneToCharcode(r)
if !found || code > 0xff {
common.Log.Debug("Failed to map rune to charcode for rune 0x%04x", r)
continue
}
encoded = append(encoded, byte(code))
}
return encoded
}
// encodeString16bit converts a Go unicode string `raw` to a PDF encoded string using the encoder `enc`.
// Each character will be encoded as two bytes.
func encodeString16bit(enc TextEncoder, raw string) []byte {
// runes -> character codes -> bytes
runes := []rune(raw)
encoded := make([]byte, 0, len(runes)*2)
for _, r := range runes {
code, ok := enc.RuneToCharcode(r)
if !ok {
common.Log.Debug("Failed to map rune to charcode. rune=%+q", r)
continue
}
// Each entry represented by 2 bytes.
var v [2]byte
binary.BigEndian.PutUint16(v[:], uint16(code))
encoded = append(encoded, v[:]...)
}
return encoded
}
// decodeString16bit converts PDF encoded string to a Go unicode string using the encoder `enc`.
// Each character will be decoded from two bytes.
func decodeString16bit(enc TextEncoder, raw []byte) string {
// bytes -> character codes -> runes
runes := make([]rune, 0, len(raw)/2+len(raw)%2)
for len(raw) > 0 {
if len(raw) == 1 {
raw = []byte{raw[0], 0}
}
// Each entry represented by 2 bytes.
code := CharCode(binary.BigEndian.Uint16(raw[:]))
raw = raw[2:]
r, ok := enc.CharcodeToRune(code)
if !ok {
common.Log.Debug("Failed to map charcode to rune. charcode=%#x", code)
continue
}
runes = append(runes, r)
}
return string(runes)
}