/
page.go
75 lines (65 loc) · 1.9 KB
/
page.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
package pdf
import (
"bytes"
"io"
"strings"
)
type Page Dictionary
func (page Page) Extract(output *Output) {
d := Dictionary(page)
// load fonts
font_map := map[string]*Font{}
resources, _ := d.GetDictionary("Resources")
fonts, _ := resources.GetDictionary("Font")
for font := range fonts {
font_info, _ := fonts.GetDictionary(font)
font_map[font] = NewFont(font_info)
}
// get contents
contents, _ := d.GetStream("Contents")
// create parser for parsing contents
page_parser := NewParser(bytes.NewReader(contents), nil)
for {
// read next command
command, _, err := page_parser.ReadCommand()
if err == ReadError {
break
}
// start of text block
if command == KEYWORD_TEXT {
// initial font is none
current_font := FontDefault
for {
command, operands, err := page_parser.ReadCommand()
// stop if end of stream or end of text block
if err == ReadError || command == KEYWORD_TEXT_END {
break
}
// handle font changes
if command == KEYWORD_TEXT_FONT {
font_name, _ := operands.GetName(len(operands) - 2)
if font, ok := font_map[font_name]; ok {
current_font = font
} else {
current_font = FontDefault
}
} else if command == KEYWORD_TEXT_SHOW_1 || command == KEYWORD_TEXT_SHOW_2 || command == KEYWORD_TEXT_SHOW_3 {
// decode text with current font font
s, _ := operands.GetString(len(operands) - 1)
io.WriteString(output.Text, current_font.Decode([]byte(s)))
io.WriteString(output.Text, "\n")
} else if command == KEYWORD_TEXT_POSITION {
// decode positioned text with current font
var sb strings.Builder
a, _ := operands.GetArray(len(operands) - 1)
for i := 0; i < len(a); i += 2 {
s, _ := a.GetString(i)
sb.WriteString(string(s))
}
io.WriteString(output.Text, current_font.Decode([]byte(sb.String())))
io.WriteString(output.Text, "\n")
}
}
}
}
}