-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.go
106 lines (91 loc) · 2.5 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
// seehuhn.de/go/pdf - a library for reading and writing PDF files
// Copyright (C) 2023 Jochen Voss <voss@seehuhn.de>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <https://www.gnu.org/licenses/>.
package main
import (
"flag"
"fmt"
"log"
"os"
"runtime"
"runtime/pprof"
"seehuhn.de/go/pdf"
"seehuhn.de/go/pdf/graphics/matrix"
"seehuhn.de/go/pdf/pagetree"
"seehuhn.de/go/pdf/reader"
)
var cpuprofile = flag.String("cpuprofile", "", "write cpu profile to `file`")
var memprofile = flag.String("memprofile", "", "write memory profile to `file`")
func main() {
flag.Parse()
if *cpuprofile != "" {
f, err := os.Create(*cpuprofile)
if err != nil {
log.Fatal("could not create CPU profile: ", err)
}
defer f.Close() // error handling omitted for example
if err := pprof.StartCPUProfile(f); err != nil {
log.Fatal("could not start CPU profile: ", err)
}
defer pprof.StopCPUProfile()
}
for _, fname := range flag.Args() {
err := extractText(fname)
if err != nil {
log.Fatal(err)
}
}
if *memprofile != "" {
f, err := os.Create(*memprofile)
if err != nil {
log.Fatal("could not create memory profile: ", err)
}
defer f.Close() // error handling omitted for example
runtime.GC() // get up-to-date statistics
if err := pprof.WriteHeapProfile(f); err != nil {
log.Fatal("could not write memory profile: ", err)
}
}
}
func extractText(fname string) error {
fd, err := os.Open(fname)
if err != nil {
return err
}
defer fd.Close()
r, err := pdf.NewReader(fd, nil)
if err != nil {
return err
}
contents := reader.New(r, nil)
contents.Text = func(text string) error {
fmt.Print(text)
return nil
}
pages := pagetree.NewIterator(r)
pageNo := 0
pages.All()(func(_ pdf.Reference, pageDict pdf.Dict) bool {
fmt.Println("Page", pageNo)
fmt.Println()
err := contents.ParsePage(pageDict, matrix.Identity)
if err != nil {
log.Fatal(err)
}
fmt.Println()
pageNo++
return true
})
return nil
}