diff --git a/pkg/api/api.go b/pkg/api/api.go index c800b375..b0e3e1d0 100644 --- a/pkg/api/api.go +++ b/pkg/api/api.go @@ -47,6 +47,25 @@ import ( "github.com/pkg/errors" ) +func logDisclaimerPDF20() { + disclaimer := ` +***************************** Disclaimer **************************** +* PDF 2.0 features are supported on a need basis. * +* (See ISO 32000:2 6.3.2 Conformance of PDF processors) * +* At the moment pdfcpu comes with basic PDF 2.0 support. * +* Please let us know which feature you would like to see supported, * +* provide a sample PDF file and create an issue: * +* https://github.com/pdfcpu/pdfcpu/issues/new/choose * +*********************************************************************` + + if log.ValidateEnabled() { + log.Validate.Println(disclaimer) + } + if log.CLIEnabled() { + log.CLI.Println(disclaimer) + } +} + // ReadContext uses an io.ReadSeeker to build an internal structure holding its cross reference table aka the Context. func ReadContext(rs io.ReadSeeker, conf *model.Configuration) (*model.Context, error) { if rs == nil { @@ -68,6 +87,10 @@ func ReadContextFile(inFile string) (*model.Context, error) { return nil, err } + if ctx.Version() == model.V20 { + logDisclaimerPDF20() + } + if err = validate.XRefTable(ctx.XRefTable); err != nil { return nil, err } @@ -77,6 +100,11 @@ func ReadContextFile(inFile string) (*model.Context, error) { // ValidateContext validates ctx. func ValidateContext(ctx *model.Context) error { + + if ctx.Version() == model.V20 { + logDisclaimerPDF20() + } + return validate.XRefTable(ctx.XRefTable) } @@ -130,6 +158,10 @@ func readAndValidate(rs io.ReadSeeker, conf *model.Configuration, from1 time.Tim from2 := time.Now() + if ctx.Version() == model.V20 { + logDisclaimerPDF20() + } + if err = validate.XRefTable(ctx.XRefTable); err != nil { return nil, 0, 0, err } @@ -146,10 +178,6 @@ func ReadValidateAndOptimize(rs io.ReadSeeker, conf *model.Configuration, from1 return nil, 0, 0, 0, err } - if ctx.Version() == model.V20 { - return nil, 0, 0, 0, pdfcpu.ErrUnsupportedVersion - } - from3 := time.Now() if err = OptimizeContext(ctx); err != nil { return nil, 0, 0, 0, err diff --git a/pkg/api/booklet.go b/pkg/api/booklet.go index 72457b97..f25667ea 100644 --- a/pkg/api/booklet.go +++ b/pkg/api/booklet.go @@ -88,10 +88,6 @@ func Booklet(rs io.ReadSeeker, w io.Writer, imgFiles, selectedPages []string, nu return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - if err := ctx.EnsurePageCount(); err != nil { return err } diff --git a/pkg/api/extract.go b/pkg/api/extract.go index cfcc0af9..cec5db3d 100644 --- a/pkg/api/extract.go +++ b/pkg/api/extract.go @@ -264,7 +264,7 @@ func ExtractPages(rs io.ReadSeeker, outDir, fileName string, selectedPages []str if len(pages) == 0 { if log.CLIEnabled() { - log.CLI.Println("aborted: nothing to extract!") + log.CLI.Println("aborted: missing page numbers!") } return nil } diff --git a/pkg/api/merge.go b/pkg/api/merge.go index de149405..6339a145 100644 --- a/pkg/api/merge.go +++ b/pkg/api/merge.go @@ -36,7 +36,7 @@ func appendTo(rs io.ReadSeeker, fName string, ctxDest *model.Context, dividerPag return err } - if ctxSource.Version() == model.V20 { + if ctxDest.Version() < model.V20 && ctxSource.Version() == model.V20 { return pdfcpu.ErrUnsupportedVersion } @@ -93,7 +93,9 @@ func prepDestContext(destFile string, rs io.ReadSeeker, conf *model.Configuratio } } - ctxDest.EnsureVersionForWriting() + if ctxDest.Version() < model.V20 { + ctxDest.EnsureVersionForWriting() + } return ctxDest, nil } @@ -135,9 +137,6 @@ func Merge(destFile string, inFiles []string, w io.Writer, conf *model.Configura if err != nil { return err } - if ctxDest.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } for _, fName := range inFiles { if err := func() error { @@ -181,6 +180,7 @@ func MergeCreateFile(inFiles []string, outFile string, dividerPage bool, conf *m return } os.Remove(outFile) + return } if err = f.Close(); err != nil { return diff --git a/pkg/api/nup.go b/pkg/api/nup.go index 2fd9e69b..4c0120cb 100644 --- a/pkg/api/nup.go +++ b/pkg/api/nup.go @@ -119,10 +119,6 @@ func NUp(rs io.ReadSeeker, w io.Writer, imgFiles, selectedPages []string, nup *m return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - if err := ctx.EnsurePageCount(); err != nil { return err } diff --git a/pkg/api/page.go b/pkg/api/page.go index e358abdd..23b3c58f 100644 --- a/pkg/api/page.go +++ b/pkg/api/page.go @@ -19,9 +19,11 @@ package api import ( "io" "os" + "sort" "time" "github.com/pdfcpu/pdfcpu/pkg/log" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -134,7 +136,7 @@ func RemovePages(rs io.ReadSeeker, w io.Writer, selectedPages []string, conf *mo conf.Cmd = model.REMOVEPAGES fromStart := time.Now() - ctx, durRead, durVal, durOpt, err := ReadValidateAndOptimize(rs, conf, fromStart) + ctx, _, _, _, err := ReadValidateAndOptimize(rs, conf, fromStart) if err != nil { return err } @@ -143,31 +145,38 @@ func RemovePages(rs io.ReadSeeker, w io.Writer, selectedPages []string, conf *mo return err } - fromWrite := time.Now() - - pages, err := PagesForPageSelection(ctx.PageCount, selectedPages, false, true) + pages, err := RemainingPagesForPageRemoval(ctx.PageCount, selectedPages, true) if err != nil { return err } - // ctx.Pagecount gets set during validation. - if len(pages) >= ctx.PageCount { - return errors.New("pdfcpu: operation invalid") + if len(pages) == 0 { + if log.CLIEnabled() { + log.CLI.Println("aborted: missing page numbers!") + } + return nil } - // No special context processing required. - // WriteContext decides which pages get written by checking conf.Cmd + var pageNrs []int + for k, v := range pages { + if v { + pageNrs = append(pageNrs, k) + } + } + sort.Ints(pageNrs) - ctx.Write.SelectedPages = pages - if err = WriteContext(ctx, w); err != nil { + ctxDest, err := pdfcpu.ExtractPages(ctx, pageNrs, false) + if err != nil { return err } - durWrite := time.Since(fromWrite).Seconds() - durTotal := time.Since(fromStart).Seconds() - logOperationStats(ctx, "remove pages, write", durRead, durVal, durOpt, durWrite, durTotal) + if conf.ValidationMode != model.ValidationNone { + if err = ValidateContext(ctxDest); err != nil { + return err + } + } - return nil + return WriteContext(ctxDest, w) } // RemovePagesFile removes selected inFile pages and writes the result to outFile.. diff --git a/pkg/api/pageLayout.go b/pkg/api/pageLayout.go index eea94c3a..ab9d7edd 100644 --- a/pkg/api/pageLayout.go +++ b/pkg/api/pageLayout.go @@ -21,7 +21,6 @@ import ( "os" "time" - "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -113,10 +112,6 @@ func SetPageLayout(rs io.ReadSeeker, w io.Writer, val model.PageLayout, conf *mo return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - ctx.RootDict["PageLayout"] = types.Name(val.String()) if err = WriteContext(ctx, w); err != nil { @@ -182,10 +177,6 @@ func ResetPageLayout(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) e return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - delete(ctx.RootDict, "PageLayout") if err = WriteContext(ctx, w); err != nil { diff --git a/pkg/api/pageMode.go b/pkg/api/pageMode.go index 6680f3bf..646bfcb6 100644 --- a/pkg/api/pageMode.go +++ b/pkg/api/pageMode.go @@ -21,7 +21,6 @@ import ( "os" "time" - "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types" "github.com/pkg/errors" @@ -113,10 +112,6 @@ func SetPageMode(rs io.ReadSeeker, w io.Writer, val model.PageMode, conf *model. return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - ctx.RootDict["PageMode"] = types.Name(val.String()) if err = WriteContext(ctx, w); err != nil { @@ -182,10 +177,6 @@ func ResetPageMode(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) err return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - delete(ctx.RootDict, "PageMode") if err = WriteContext(ctx, w); err != nil { diff --git a/pkg/api/permission.go b/pkg/api/permission.go index 3e9fae7f..bbd30345 100644 --- a/pkg/api/permission.go +++ b/pkg/api/permission.go @@ -21,6 +21,7 @@ import ( "os" "time" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pkg/errors" ) @@ -68,6 +69,10 @@ func SetPermissions(rs io.ReadSeeker, w io.Writer, conf *model.Configuration) er return err } + if ctx.Version() == model.V20 { + return pdfcpu.ErrUnsupportedVersion + } + fromWrite := time.Now() if err = WriteContext(ctx, w); err != nil { return err @@ -141,6 +146,11 @@ func GetPermissions(rs io.ReadSeeker, conf *model.Configuration) (*int16, error) if err != nil { return nil, err } + + if ctx.Version() == model.V20 { + return nil, pdfcpu.ErrUnsupportedVersion + } + if ctx.E == nil { // Full access - permissions don't apply. return nil, nil diff --git a/pkg/api/selectPages.go b/pkg/api/selectPages.go index d8efb69e..ed149899 100644 --- a/pkg/api/selectPages.go +++ b/pkg/api/selectPages.go @@ -391,6 +391,26 @@ func PagesForPageSelection(pageCount int, pageSelection []string, ensureAllforNo return m, nil } +func RemainingPagesForPageRemoval(pageCount int, pageSelection []string, log bool) (types.IntSet, error) { + pagesToRemove, err := selectedPages(pageCount, pageSelection, log) + if err != nil { + return nil, err + } + + m := types.IntSet{} + for i := 1; i <= pageCount; i++ { + m[i] = true + } + + for k, v := range pagesToRemove { + if v { + m[k] = false + } + } + + return m, nil +} + func deletePageFromCollection(cp *[]int, p int) { a := []int{} for _, i := range *cp { diff --git a/pkg/api/trim.go b/pkg/api/trim.go index 66d573ff..8f1a4d21 100644 --- a/pkg/api/trim.go +++ b/pkg/api/trim.go @@ -19,8 +19,11 @@ package api import ( "io" "os" + "sort" "time" + "github.com/pdfcpu/pdfcpu/pkg/log" + "github.com/pdfcpu/pdfcpu/pkg/pdfcpu" "github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model" "github.com/pkg/errors" ) @@ -38,7 +41,7 @@ func Trim(rs io.ReadSeeker, w io.Writer, selectedPages []string, conf *model.Con conf.Cmd = model.TRIM fromStart := time.Now() - ctx, durRead, durVal, durOpt, err := ReadValidateAndOptimize(rs, conf, fromStart) + ctx, _, _, _, err := ReadValidateAndOptimize(rs, conf, fromStart) if err != nil { return err } @@ -47,26 +50,38 @@ func Trim(rs io.ReadSeeker, w io.Writer, selectedPages []string, conf *model.Con return err } - fromWrite := time.Now() - pages, err := PagesForPageSelection(ctx.PageCount, selectedPages, false, true) if err != nil { return err } - // No special context processing required. - // WriteContext decides which pages get written by checking conf.Cmd + if len(pages) == 0 { + if log.CLIEnabled() { + log.CLI.Println("aborted: missing page numbers!") + } + return nil + } - ctx.Write.SelectedPages = pages - if err = WriteContext(ctx, w); err != nil { + var pageNrs []int + for k, v := range pages { + if v { + pageNrs = append(pageNrs, k) + } + } + sort.Ints(pageNrs) + + ctxDest, err := pdfcpu.ExtractPages(ctx, pageNrs, false) + if err != nil { return err } - durWrite := time.Since(fromWrite).Seconds() - durTotal := time.Since(fromStart).Seconds() - logOperationStats(ctx, "trim, write", durRead, durVal, durOpt, durWrite, durTotal) + if conf.ValidationMode != model.ValidationNone { + if err = ValidateContext(ctxDest); err != nil { + return err + } + } - return nil + return WriteContext(ctxDest, w) } // TrimFile generates a trimmed version of inFile diff --git a/pkg/api/viewerPreferences.go b/pkg/api/viewerPreferences.go index 6428056d..d36b8e81 100644 --- a/pkg/api/viewerPreferences.go +++ b/pkg/api/viewerPreferences.go @@ -183,10 +183,6 @@ func SetViewerPreferences(rs io.ReadSeeker, w io.Writer, vp model.ViewerPreferen return err } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - version := ctx.Version() if err := vp.Validate(version); err != nil { @@ -365,10 +361,6 @@ func ResetViewerPreferences(rs io.ReadSeeker, w io.Writer, conf *model.Configura return ErrNoOp } - if ctx.Version() == model.V20 { - return pdfcpu.ErrUnsupportedVersion - } - delete(ctx.RootDict, "ViewerPreferences") if err = WriteContext(ctx, w); err != nil { diff --git a/pkg/cli/list.go b/pkg/cli/list.go index 5fcb1fed..641a4d58 100644 --- a/pkg/cli/list.go +++ b/pkg/cli/list.go @@ -381,6 +381,10 @@ func listPermissions(rs io.ReadSeeker, conf *model.Configuration) ([]string, err return nil, err } + if ctx.Version() == model.V20 { + return nil, pdfcpu.ErrUnsupportedVersion + } + return pdfcpu.Permissions(ctx), nil } diff --git a/pkg/pdfcpu/crypto.go b/pkg/pdfcpu/crypto.go index 7412bfd6..9125d1fc 100644 --- a/pkg/pdfcpu/crypto.go +++ b/pkg/pdfcpu/crypto.go @@ -1399,17 +1399,18 @@ func fileID(ctx *model.Context) (types.HexLiteral, error) { h.Write([]byte(strconv.Itoa(ctx.Read.ReadFileSize()))) // All values of the info dict which is assumed to be there at this point. - d, err := ctx.DereferenceDict(*ctx.Info) - if err != nil { - return "", err - } - - for _, v := range d { - o, err := ctx.Dereference(v) + if ctx.Version() < model.V20 { + d, err := ctx.DereferenceDict(*ctx.Info) if err != nil { return "", err } - h.Write([]byte(o.String())) + for _, v := range d { + o, err := ctx.Dereference(v) + if err != nil { + return "", err + } + h.Write([]byte(o.String())) + } } m := h.Sum(nil) diff --git a/pkg/pdfcpu/model/xreftable.go b/pkg/pdfcpu/model/xreftable.go index afba67cb..c4bd91b4 100644 --- a/pkg/pdfcpu/model/xreftable.go +++ b/pkg/pdfcpu/model/xreftable.go @@ -1825,11 +1825,18 @@ func consolidateResourceDict(d types.Dict, prn PageResourceNames, pageNr int) er return nil } -func consolidateResources(consolidateRes bool, xRefTable *XRefTable, pageDict, resDict types.Dict, page int) error { +func (xRefTable *XRefTable) consolidateResourcesWithContent(pageDict, resDict types.Dict, page int, consolidateRes bool) error { if !consolidateRes { return nil } + if obj, found := pageDict.Find("Resources"); found { + if _, ok := obj.(types.IndirectRef); !ok { + return nil + } + + } + bb, err := xRefTable.PageContent(pageDict) if err != nil { if err == ErrNoContent { @@ -1875,10 +1882,9 @@ func (xRefTable *XRefTable) processPageTreeForPageDict(root *types.IndirectRef, return nil, nil, err } - // Iterate over page tree. kids := d.ArrayEntry("Kids") if kids == nil { - return d, root, consolidateResources(consolidateRes, xRefTable, d, pAttrs.Resources, page) + return d, root, xRefTable.consolidateResourcesWithContent(d, pAttrs.Resources, page, consolidateRes) } for _, o := range kids { diff --git a/pkg/pdfcpu/page.go b/pkg/pdfcpu/page.go index 17bd5593..0fd24a19 100644 --- a/pkg/pdfcpu/page.go +++ b/pkg/pdfcpu/page.go @@ -31,6 +31,8 @@ func addPages( fieldsSrc, fieldsDest *types.Array, migrated map[int]int) error { + // Used by collect, extractPages, split + pageCache := map[int]*types.IndirectRef{} for _, i := range pageNrs { @@ -44,7 +46,7 @@ func addPages( } } - d, _, inhPAttrs, err := ctxSrc.PageDict(i, false) + d, _, inhPAttrs, err := ctxSrc.PageDict(i, true) if err != nil { return err } diff --git a/pkg/pdfcpu/validate/xReftable.go b/pkg/pdfcpu/validate/xReftable.go index b171c8b9..a61672d8 100644 --- a/pkg/pdfcpu/validate/xReftable.go +++ b/pkg/pdfcpu/validate/xReftable.go @@ -874,6 +874,10 @@ func validateRootObject(xRefTable *model.XRefTable) error { // Collection y 1.7 dict => 12.3.5 Collections // NeedsRendering y 1.7 boolean => XML Forms Architecture (XFA) Spec. + // DSS y 2.0 dict => 12.8.4.3 Document Security Store TODO + // AF y 2.0 array of dicts => 14.3 Associated Files TODO + // DPartRoot y 2.0 dict => 14.12 Document parts TODO + d, err := xRefTable.Catalog() if err != nil { return err diff --git a/pkg/pdfcpu/write.go b/pkg/pdfcpu/write.go index d02af0fd..db3d91f5 100644 --- a/pkg/pdfcpu/write.go +++ b/pkg/pdfcpu/write.go @@ -100,7 +100,13 @@ func Write(ctx *model.Context) (err error) { // Since we support PDF Collections (since V1.7) for file attachments // we need to generate V1.7 PDF files. - if err = writeHeader(ctx.Write, model.V17); err != nil { + v := model.V17 + + if ctx.Version() == model.V20 { + v = model.V20 + } + + if err = writeHeader(ctx.Write, v); err != nil { return err } @@ -203,8 +209,10 @@ func ensureFileID(ctx *model.Context) error { } func ensureInfoDictAndFileID(ctx *model.Context) error { - if err := ensureInfoDict(ctx); err != nil { - return err + if ctx.Version() < model.V20 { + if err := ensureInfoDict(ctx); err != nil { + return err + } } return ensureFileID(ctx) @@ -344,7 +352,6 @@ func writeRootObject(ctx *model.Context) error { d.Delete("Dests") d.Delete("Outlines") d.Delete("OpenAction") - //d.Delete("AcroForm") d.Delete("StructTreeRoot") d.Delete("OCProperties") } @@ -1001,6 +1008,17 @@ func updateEncryption(ctx *model.Context) error { } func handleEncryption(ctx *model.Context) error { + + if ctx.Version() == model.V20 { + if ctx.Cmd == model.ENCRYPT || + ctx.Cmd == model.DECRYPT || + ctx.Cmd == model.CHANGEUPW || + ctx.Cmd == model.CHANGEOPW || + ctx.Cmd == model.SETPERMISSIONS { + return ErrUnsupportedVersion + } + } + if ctx.Cmd == model.ENCRYPT || ctx.Cmd == model.DECRYPT { if ctx.Cmd == model.DECRYPT { diff --git a/pkg/pdfcpu/writePages.go b/pkg/pdfcpu/writePages.go index 7824720d..3f74781e 100644 --- a/pkg/pdfcpu/writePages.go +++ b/pkg/pdfcpu/writePages.go @@ -220,15 +220,6 @@ func writeKids(ctx *model.Context, a types.Array, pageNr *int) (types.Array, int return kids, count, nil } -func containsSelectedPages(ctx *model.Context, from, thru int) bool { - for i := from; i <= thru; i++ { - if ctx.Write.SelectedPages[i] { - return true - } - } - return false -} - func writePageEntries(ctx *model.Context, d types.Dict, dictName string) error { // TODO Check inheritance rules. for _, e := range []struct { @@ -248,31 +239,6 @@ func writePageEntries(ctx *model.Context, d types.Dict, dictName string) error { return nil } -func skipPageSubTree(ctx *model.Context, pageNr *int, c int) bool { - // TRIM, REMOVEPAGES are the only commands where we modify the page tree during writing. - // In these cases the selected pages to be written or to be removed are defined in ctx.Write.SelectedPages. - - if len(ctx.Write.SelectedPages) > 0 { - if log.WriteEnabled() { - log.Write.Printf("writePagesDict: checking page range %d - %d \n", *pageNr+1, *pageNr+c) - } - if ctx.Cmd == model.REMOVEPAGES || - ((ctx.Cmd == model.TRIM) && containsSelectedPages(ctx, *pageNr+1, *pageNr+c)) { - if log.WriteEnabled() { - log.Write.Println("writePagesDict: process this subtree") - } - } else { - if log.WriteEnabled() { - log.Write.Println("writePagesDict: skip this subtree") - } - *pageNr += c - return true - } - } - - return false -} - func writePagesDict(ctx *model.Context, indRef *types.IndirectRef, pageNr *int) (skip bool, writtenPages int, err error) { if log.WriteEnabled() { log.Write.Printf("writePagesDict: begin pageNr=%d\n", *pageNr) @@ -298,10 +264,6 @@ func writePagesDict(ctx *model.Context, indRef *types.IndirectRef, pageNr *int) kidsOrig := d.ArrayEntry("Kids") - if skipPageSubTree(ctx, pageNr, c) { - return true, 0, nil - } - // Iterate over page tree. kidsArray := d.ArrayEntry("Kids") kidsNew, countNew, err := writeKids(ctx, kidsArray, pageNr)