forked from pgaskin/BookBrowser
/
pdf.go
120 lines (97 loc) · 2.1 KB
/
pdf.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
package pdf
import (
"crypto/sha1"
"fmt"
"image"
"io"
"io/ioutil"
"os"
"path/filepath"
"runtime/debug"
"github.com/beevik/etree"
"github.com/geek1011/BookBrowser/booklist"
"github.com/geek1011/BookBrowser/formats"
"github.com/geek1011/BookBrowser/util"
"github.com/pkg/errors"
)
type pdf struct {
book *booklist.Book
}
func (e *pdf) Book() *booklist.Book {
return e.book
}
func (e *pdf) HasCover() bool {
return false
}
func (e *pdf) GetCover() (i image.Image, err error) {
return nil, errors.New("no cover")
}
func load(filename string) (bi formats.BookInfo, ferr error) {
defer func() {
if r := recover(); r != nil {
bi = nil
ferr = fmt.Errorf("unknown error: %s", r)
}
}()
p := &pdf{book: &booklist.Book{}}
f, err := os.Open(filename)
if err != nil {
return nil, err
}
fi, err := f.Stat()
if err != nil {
f.Close()
return nil, errors.Wrapf(err, "could not stat book")
}
p.book.FilePath = filename
p.book.FileSize = fi.Size()
p.book.ModTime = fi.ModTime()
s := sha1.New()
i, err := io.Copy(s, f)
if err == nil && i != fi.Size() {
err = errors.New("could not read whole file")
}
if err != nil {
f.Close()
return nil, errors.Wrap(err, "could not hash book")
}
p.book.Hash = fmt.Sprintf("%x", s.Sum(nil))
f.Close()
c, err := ioutil.ReadFile(filename)
if err != nil {
return nil, err
}
str := string(c)
c = []byte{}
str = util.StringBetween(str, "<?xpacket begin", "</x:xmpmeta>")
str = util.StringAfter(str, ">")
xmp := etree.NewDocument()
err = xmp.ReadFromString(str)
if err != nil {
return nil, err
}
p.book.Title = filepath.Base(filename)
for _, e := range xmp.FindElements("//format") {
// Make sure it is a pdf, not another piece of embedded RDF metadata
if e.Text() != "application/pdf" {
str = ""
debug.FreeOSMemory()
return p, nil
}
break
}
for _, e := range xmp.FindElements("//title/Alt/li") {
p.book.Title = e.Text()
break
}
for _, e := range xmp.FindElements("//creator/Seq/li") {
p.book.Author = e.Text()
break
}
str = ""
debug.FreeOSMemory()
return p, nil
}
func init() {
formats.Register("pdf", load)
}