Skip to content

Commit

Permalink
Add support for reading page trees. Fixes #26
Browse files Browse the repository at this point in the history
  • Loading branch information
phpdave11 committed Feb 22, 2020
1 parent 63b1ffd commit 2772624
Showing 1 changed file with 73 additions and 30 deletions.
103 changes: 73 additions & 30 deletions reader.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ type PdfReader struct {
f io.ReadSeeker
nBytes int64
sourceFile string
curPage int
alreadyRead bool
}

func NewPdfReaderFromStream(rs io.ReadSeeker) (*PdfReader, error) {
Expand Down Expand Up @@ -1181,6 +1183,40 @@ func (this *PdfReader) readRoot() error {
return nil
}

// Read kids (pages inside a page tree)
func (this *PdfReader) readKids(kids *PdfValue, r int) error {
// Loop through pages and add to result
for i := 0; i < len(kids.Array); i++ {
page, err := this.resolveObject(kids.Array[i])
if err != nil {
return errors.Wrap(err, "Failed to resolve page/pages object")
}

objType := page.Value.Dictionary["/Type"].Token
if objType == "/Page" {
// Set page and increment curPage
this.pages[this.curPage] = page
this.curPage++
} else if objType == "/Pages" {
// Resolve kids
subKids, err := this.resolveObject(page.Value.Dictionary["/Kids"])
if err != nil {
return errors.Wrap(err, "Failed to resolve kids")
}

// Recurse into page tree
err = this.readKids(subKids, r+1)
if err != nil {
return errors.Wrap(err, "Failed to read kids")
}
} else {
return errors.Wrap(err, fmt.Sprintf("Unknown object type '%s'. Expected: /Pages or /Page", objType))
}
}

return nil
}

// Read all pages in PDF
func (this *PdfReader) readPages() error {
var err error
Expand All @@ -1197,18 +1233,19 @@ func (this *PdfReader) readPages() error {
return errors.Wrap(err, "Failed to resolve kids object")
}

// Allocate pages
this.pages = make([]*PdfValue, len(kids.Array))
// Get number of pages
pageCount, err := this.resolveObject(pagesDict.Value.Dictionary["/Count"])
if err != nil {
return errors.Wrap(err, "Failed to get page count")
}

// Loop through pages and add to result
for i := 0; i < len(kids.Array); i++ {
page, err := this.resolveObject(kids.Array[i])
if err != nil {
return errors.Wrap(err, "Failed to resolve kid object")
}
// Allocate pages
this.pages = make([]*PdfValue, pageCount.Int)

// Set page
this.pages[i] = page
// Read kids
err = this.readKids(kids, 0)
if err != nil {
return errors.Wrap(err, "Failed to read kids")
}

return nil
Expand Down Expand Up @@ -1547,30 +1584,36 @@ func (this *PdfReader) _getPageRotation(page *PdfValue) (*PdfValue, error) {
}

func (this *PdfReader) read() error {
var err error
// Only run once
if !this.alreadyRead {
var err error

// Find xref position
err = this.findXref()
if err != nil {
return errors.Wrap(err, "Failed to find xref position")
}
// Find xref position
err = this.findXref()
if err != nil {
return errors.Wrap(err, "Failed to find xref position")
}

// Parse xref table
err = this.readXref()
if err != nil {
return errors.Wrap(err, "Failed to read xref table")
}
// Parse xref table
err = this.readXref()
if err != nil {
return errors.Wrap(err, "Failed to read xref table")
}

// Read catalog
err = this.readRoot()
if err != nil {
return errors.Wrap(err, "Failed to read root")
}
// Read catalog
err = this.readRoot()
if err != nil {
return errors.Wrap(err, "Failed to read root")
}

// Read pages
err = this.readPages()
if err != nil {
return errors.Wrap(err, "Failed to to read pages")
// Read pages
err = this.readPages()
if err != nil {
return errors.Wrap(err, "Failed to to read pages")
}

// Now that this has been read, do not read again
this.alreadyRead = true
}

return nil
Expand Down

0 comments on commit 2772624

Please sign in to comment.