From 515e9dbdaa81934d6a4a85502cd76826027c234e Mon Sep 17 00:00:00 2001 From: Horst Rutter Date: Tue, 27 Nov 2018 02:26:16 +0100 Subject: [PATCH] name tree fixes --- README.md | 4 ++- lzw/README.md | 2 +- pkg/api/process_test.go | 5 ++++ pkg/pdfcpu/attach.go | 14 ++++----- pkg/pdfcpu/nameTree.go | 18 ++++++----- pkg/pdfcpu/validate/nameTree.go | 23 +++++++++----- pkg/pdfcpu/validate/structTree.go | 6 +++- pkg/pdfcpu/validate/xReftable.go | 20 +++++-------- pkg/pdfcpu/xreftable.go | 50 +++++++++++++++---------------- tiff/README.md | 2 +- 10 files changed, 78 insertions(+), 66 deletions(-) diff --git a/README.md b/README.md index b6208969..a414ef01 100644 --- a/README.md +++ b/README.md @@ -99,7 +99,6 @@ Required build version: go1.9 and up ## Contributing -* By participating in any form or contributing to `pdfcpu` you are are expected to uphold our [Code of Conduct](CODE_OF_CONDUCT.md). * Please open an issue if you find a bug or want to propose a change. * Feature requests - always welcome. * Bug fixes - always welcome. @@ -114,6 +113,9 @@ Thanks goes to these wonderful people: | :---: | :---: | :---: | :---: | :---: | :---: | :---: | +## Code of Conduct + +Please note that this project is released with a Contributor [Code of Conduct](CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. ## Disclaimer diff --git a/lzw/README.md b/lzw/README.md index 9716d283..798b8b0a 100644 --- a/lzw/README.md +++ b/lzw/README.md @@ -2,7 +2,7 @@ * This is a consolidated version of `compress/lzw` that supports GIF, TIFF and PDF. * Please refer to this [golang proposal](https://github.com/golang/go/issues/25409) for details. -* `pdfcpu` also hosts an improved version of Go's TIFF package at [github.com/hhrutter/pdfcpu/tiff](https://github.com/hhrutter/pdfcpu/tiff) leveraging the improved `compress/lzw`. +* `pdfcpu` also hosts an improved version of Go's TIFF package at [github.com/hhrutter/pdfcpu/tiff](https://github.com/hhrutter/pdfcpu/tree/master/tiff) leveraging the improved `compress/lzw`. ## Background diff --git a/pkg/api/process_test.go b/pkg/api/process_test.go index 11d39985..d1d840ee 100644 --- a/pkg/api/process_test.go +++ b/pkg/api/process_test.go @@ -343,6 +343,11 @@ func TestOptimizeCommandWithLF(t *testing.T) { t.Fatalf("TestOptimizeCommandWithLF: %v\n", err) } + _, err = Process(ValidateCommand(outFile, config)) + if err != nil { + t.Fatalf("TestOptimizeCommandWithLF validation: %v\n", err) + } + } } diff --git a/pkg/pdfcpu/attach.go b/pkg/pdfcpu/attach.go index fe7c0bc4..ea11e91c 100644 --- a/pkg/pdfcpu/attach.go +++ b/pkg/pdfcpu/attach.go @@ -253,7 +253,7 @@ func AttachList(xRefTable *XRefTable) (list []string, err error) { log.Debug.Println("List begin") - if !xRefTable.Valid && xRefTable.Names["EmbeddedFiles"] == nil { + if !xRefTable.Valid { err = xRefTable.LocateNameTree("EmbeddedFiles", false) if err != nil { return nil, err @@ -280,7 +280,7 @@ func AttachExtract(ctx *Context, files StringSet) (err error) { log.Debug.Println("Extract begin") - if !ctx.Valid && ctx.Names["EmbeddedFiles"] == nil { + if !ctx.Valid { err = ctx.LocateNameTree("EmbeddedFiles", false) if err != nil { return err @@ -308,11 +308,9 @@ func AttachAdd(xRefTable *XRefTable, files StringSet) (ok bool, err error) { log.Debug.Println("Add begin") - if xRefTable.Names["EmbeddedFiles"] == nil { - err := xRefTable.LocateNameTree("EmbeddedFiles", true) - if err != nil { - return false, err - } + err = xRefTable.LocateNameTree("EmbeddedFiles", true) + if err != nil { + return false, err } ok, err = addAttachedFiles(xRefTable, files) @@ -328,7 +326,7 @@ func AttachRemove(xRefTable *XRefTable, files StringSet) (ok bool, err error) { log.Debug.Println("Remove begin") - if !xRefTable.Valid && xRefTable.Names["EmbeddedFiles"] == nil { + if !xRefTable.Valid { err = xRefTable.LocateNameTree("EmbeddedFiles", false) if err != nil { return false, err diff --git a/pkg/pdfcpu/nameTree.go b/pkg/pdfcpu/nameTree.go index 33d554c0..c99f9039 100644 --- a/pkg/pdfcpu/nameTree.go +++ b/pkg/pdfcpu/nameTree.go @@ -32,10 +32,10 @@ const maxEntries = 3 // Once maxEntries has been reached a leaf node turns into an intermediary node with two kids, // which are leaf nodes each of them holding half of the sorted entries of the original leaf node. type Node struct { - Kids []*Node // Mirror of the name tree's Kids array. - Names []entry // Mirror of the name tree's Names array. - Kmin, Kmax string // Mirror of the name tree's Limit array[Kmin,Kmax]. - IndRef *IndirectRef // Pointer to the PDF object representing this name tree node. + Kids []*Node // Mirror of the name tree's Kids array, an array of indirect references. + Names []entry // Mirror of the name tree's Names array. + Kmin, Kmax string // Mirror of the name tree's Limit array[Kmin,Kmax]. + D *Dict // Pointer to the PDF dict representing this name tree node. } // entry is a key value pair. @@ -111,6 +111,11 @@ func (n *Node) AddToLeaf(k string, v Object) { // Add adds an entry to a name tree. func (n *Node) Add(xRefTable *XRefTable, k string, v Object) error { + // The values associated with the keys may be objects of any type. + // Stream objects shall be specified by indirect object references. + // Dictionary, array, and string objects should be specified by indirect object references. + // Other PDF objects (nulls, numbers, booleans, and names) should be specified as direct objects. + if n.Names == nil { n.Names = make([]entry, 0, maxEntries) } @@ -324,7 +329,7 @@ func (n *Node) removeFromKids(xRefTable *XRefTable, k string) (ok bool, err erro // This kid is now empty and needs to be removed. if xRefTable != nil { - err := xRefTable.DeleteObjectGraph(*n.Kids[i].IndRef) + err = xRefTable.deleteObject(*kid.D) if err != nil { return false, err } @@ -351,7 +356,7 @@ func (n *Node) removeFromKids(xRefTable *XRefTable, k string) (ok bool, err erro log.Debug.Println("removeFromKids: only 1 kid") if xRefTable != nil { - err = xRefTable.DeleteObject(n.IndRef.ObjectNumber.Value()) + err = xRefTable.deleteObject(*n.D) if err != nil { return false, err } @@ -360,7 +365,6 @@ func (n *Node) removeFromKids(xRefTable *XRefTable, k string) (ok bool, err erro *n = *n.Kids[0] log.Debug.Printf("removeFromKids: new n = %s\n", n) - log.Debug.Printf("removeFromKids: n.IndRef = %v\n", n.IndRef) return true, nil } diff --git a/pkg/pdfcpu/validate/nameTree.go b/pkg/pdfcpu/validate/nameTree.go index bd818063..5e683e6b 100644 --- a/pkg/pdfcpu/validate/nameTree.go +++ b/pkg/pdfcpu/validate/nameTree.go @@ -548,6 +548,12 @@ func validateIDTreeValue(xRefTable *pdf.XRefTable, o pdf.Object, sinceVersion pd func validateNameTreeValue(name string, xRefTable *pdf.XRefTable, o pdf.Object) (err error) { + // TODO + // The values associated with the keys may be objects of any type. + // Stream objects shall be specified by indirect object references. + // Dictionary, array, and string objects should be specified by indirect object references. + // Other PDF objects (nulls, numbers, booleans, and names) should be specified as direct objects. + for k, v := range map[string]struct { validate func(xRefTable *pdf.XRefTable, o pdf.Object, sinceVersion pdf.Version) error sinceVersion pdf.Version @@ -673,19 +679,15 @@ func validateNameTreeDictLimitsEntry(xRefTable *pdf.XRefTable, d pdf.Dict, first return nil } -func validateNameTree(xRefTable *pdf.XRefTable, name string, ir pdf.IndirectRef, root bool) (string, string, *pdf.Node, error) { +func validateNameTree(xRefTable *pdf.XRefTable, name string, d pdf.Dict, root bool) (string, string, *pdf.Node, error) { // see 7.7.4 // A node has "Kids" or "Names" entry. - node := &pdf.Node{IndRef: &ir} + node := &pdf.Node{D: &d} var kmin, kmax string - - d, err := xRefTable.DereferenceDict(ir) - if err != nil || d == nil { - return "", "", nil, err - } + var err error // Kids: array of indirect references to the immediate children of this node. // if Kids present then recurse @@ -709,9 +711,14 @@ func validateNameTree(xRefTable *pdf.XRefTable, name string, ir pdf.IndirectRef, return "", "", nil, errors.New("validateNameTree: corrupt kid, should be indirect reference") } + d, err := xRefTable.DereferenceDict(kid) + if err != nil { + return "", "", nil, err + } + var kminKid string var kidNode *pdf.Node - kminKid, kmax, kidNode, err = validateNameTree(xRefTable, name, kid, false) + kminKid, kmax, kidNode, err = validateNameTree(xRefTable, name, d, false) if err != nil { return "", "", nil, err } diff --git a/pkg/pdfcpu/validate/structTree.go b/pkg/pdfcpu/validate/structTree.go index a755a963..5c868693 100644 --- a/pkg/pdfcpu/validate/structTree.go +++ b/pkg/pdfcpu/validate/structTree.go @@ -626,7 +626,11 @@ func validateStructTreeRootDict(xRefTable *pdf.XRefTable, d pdf.Dict) error { // A name tree that maps element identifiers to the structure elements they denote. ir := d.IndirectRefEntry("IDTree") if ir != nil { - _, _, _, err := validateNameTree(xRefTable, "IDTree", *ir, true) + d, err := xRefTable.DereferenceDict(*ir) + if err != nil { + return err + } + _, _, _, err = validateNameTree(xRefTable, "IDTree", d, true) if err != nil { return err } diff --git a/pkg/pdfcpu/validate/xReftable.go b/pkg/pdfcpu/validate/xReftable.go index 30d79347..1c7eedb3 100644 --- a/pkg/pdfcpu/validate/xReftable.go +++ b/pkg/pdfcpu/validate/xReftable.go @@ -104,15 +104,6 @@ func validateNames(xRefTable *pdf.XRefTable, rootDict pdf.Dict, required bool, s // => 7.7.4 Name Dictionary - /* - - - 86: - -