Skip to content

Commit

Permalink
Fix parsing errors
Browse files Browse the repository at this point in the history
  • Loading branch information
hhrutter committed Nov 11, 2020
1 parent 298e390 commit 0b3fcc7
Show file tree
Hide file tree
Showing 14 changed files with 138 additions and 79 deletions.
2 changes: 1 addition & 1 deletion pkg/pdfcpu/attach.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ func fileSpecStreamDictInfo(xRefTable *XRefTable, id string, o Object, decode bo
return nil, desc, fileName, nil, nil
}

sd, err := xRefTable.DereferenceStreamDict(o)
sd, _, err := xRefTable.DereferenceStreamDict(o)
if err != nil || sd == nil {
return nil, desc, fileName, nil, err
}
Expand Down
22 changes: 19 additions & 3 deletions pkg/pdfcpu/context.go
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,15 @@ func NewContext(rs io.ReadSeeker, conf *Configuration) (*Context, error) {
conf = NewDefaultConfiguration()
}

rdCtx, err := newReadContext(rs)
if err != nil {
return nil, err
}

ctx := &Context{
conf,
newXRefTable(conf.ValidationMode),
newReadContext(rs),
rdCtx,
newOptimizationContext(),
NewWriteContext(conf.Eol),
false,
Expand Down Expand Up @@ -342,12 +347,23 @@ type ReadContext struct {
XRefStreams IntSet // All object numbers of any xref streams found.
}

func newReadContext(rs io.ReadSeeker) *ReadContext {
return &ReadContext{
func newReadContext(rs io.ReadSeeker) (*ReadContext, error) {

rdCtx := &ReadContext{
rs: rs,
ObjectStreams: IntSet{},
XRefStreams: IntSet{},
}

if f, ok := rs.(*os.File); ok {
fileInfo, err := f.Stat()
if err != nil {
return nil, err
}
rdCtx.FileSize = fileInfo.Size()
}

return rdCtx, nil
}

// IsObjectStreamObject returns true if object i is a an object stream.
Expand Down
4 changes: 2 additions & 2 deletions pkg/pdfcpu/extract.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ func (ctx *Context) ExtractFont(objNr int) (*Font, error) {
case "TrueType":
// ttf ... true type file
// ttc ... true type collection
sd, err := ctx.DereferenceStreamDict(*ir)
sd, _, err := ctx.DereferenceStreamDict(*ir)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -279,7 +279,7 @@ func extractMetadataFromDict(ctx *Context, d Dict, parentObjNr int) (*Metadata,
if !found || o == nil {
return nil, nil
}
sd, err := ctx.DereferenceStreamDict(o)
sd, _, err := ctx.DereferenceStreamDict(o)
if err != nil {
return nil, err
}
Expand Down
4 changes: 2 additions & 2 deletions pkg/pdfcpu/optimize.go
Original file line number Diff line number Diff line change
Expand Up @@ -393,7 +393,7 @@ func optimizeXObjectResourcesDict(ctx *Context, rDict Dict, pageNumber, pageObjN
// We are dealing with a new XObject..
// Dereference the XObject stream dict.

osd, err := ctx.DereferenceStreamDictForValidation(indRef, false)
osd, _, err := ctx.DereferenceStreamDict(indRef)
if err != nil {
return err
}
Expand Down Expand Up @@ -764,7 +764,7 @@ func streamLengthFontFile(xRefTable *XRefTable, indirectRef *IndirectRef) (*int6

objectNumber := indirectRef.ObjectNumber

sd, err := xRefTable.DereferenceStreamDict(*indirectRef)
sd, _, err := xRefTable.DereferenceStreamDict(*indirectRef)
if err != nil {
return nil, err
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/pdfcpu/pages.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ func (xRefTable *XRefTable) PageContent(d Dict) ([]byte, error) {
if o == nil {
continue
}
o, err := xRefTable.DereferenceStreamDict(o)
o, _, err := xRefTable.DereferenceStreamDict(o)
if err != nil {
return nil, err
}
Expand Down
31 changes: 17 additions & 14 deletions pkg/pdfcpu/read.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,14 +85,6 @@ func Read(rs io.ReadSeeker, conf *Configuration) (*Context, error) {
*ctx.XRefTable.Size = len(ctx.XRefTable.Table)
}

if f, ok := rs.(*os.File); ok {
fileInfo, err := f.Stat()
if err != nil {
return nil, err
}
ctx.Read.FileSize = fileInfo.Size()
}

log.Read.Println("Read: end")

return ctx, nil
Expand Down Expand Up @@ -158,7 +150,8 @@ func newPositionedReader(rs io.ReadSeeker, offset *int64) (*bufio.Reader, error)

// Get the file offset of the last XRefSection.
// Go to end of file and search backwards for the first occurrence of startxref {offset} %%EOF
func offsetLastXRefSection(ctx *Context) (*int64, error) {
// xref at 114172
func offsetLastXRefSection(ctx *Context, skip int64) (*int64, error) {

rs := ctx.Read.rs

Expand All @@ -170,7 +163,7 @@ func offsetLastXRefSection(ctx *Context) (*int64, error) {

for i := 1; offset == 0; i++ {

off, err := rs.Seek(-int64(i)*bufSize, io.SeekEnd)
off, err := rs.Seek(-int64(i)*bufSize-skip, io.SeekEnd)
if err != nil {
return nil, errors.New("pdfcpu: can't find last xref section")
}
Expand Down Expand Up @@ -203,10 +196,9 @@ func offsetLastXRefSection(ctx *Context) (*int64, error) {

p = p[:posEOF]
offset, err = strconv.ParseInt(strings.TrimSpace(string(p)), 10, 64)
if err != nil {
if err != nil || offset >= ctx.Read.FileSize {
return nil, errors.New("pdfcpu: corrupted last xref section")
}

}

log.Read.Printf("Offset last xrefsection: %d\n", offset)
Expand Down Expand Up @@ -1170,9 +1162,21 @@ func buildXRefTableStartingAt(ctx *Context, offset *int64) error {

ctx.HeaderVersion = hv
ctx.Read.EolCount = eolCount
offs := map[int64]bool{}

for offset != nil {

if offs[*offset] {
offset, err = offsetLastXRefSection(ctx, ctx.Read.FileSize-*offset)
if err != nil {
return err
}
if offs[*offset] {
return nil
}
}

offs[*offset] = true
rd, err := newPositionedReader(rs, offset)
if err != nil {
return err
Expand All @@ -1194,7 +1198,6 @@ func buildXRefTableStartingAt(ctx *Context, offset *int64) error {
return err
}
} else {

log.Read.Println("buildXRefTableStartingAt: found xref stream")
ctx.Read.UsingXRefStreams = true
rd, err = newPositionedReader(rs, offset)
Expand Down Expand Up @@ -1223,7 +1226,7 @@ func readXRefTable(ctx *Context) (err error) {

log.Read.Println("readXRefTable: begin")

offset, err := offsetLastXRefSection(ctx)
offset, err := offsetLastXRefSection(ctx, 0)
if err != nil {
return
}
Expand Down
6 changes: 3 additions & 3 deletions pkg/pdfcpu/renderImage.go
Original file line number Diff line number Diff line change
Expand Up @@ -213,7 +213,7 @@ func softMask(xRefTable *XRefTable, d *StreamDict, w, h, objNr int) ([]byte, err

// Soft mask present.

sd, err := xRefTable.DereferenceStreamDict(o)
sd, _, err := xRefTable.DereferenceStreamDict(o)
if err != nil {
return nil, err
}
Expand Down Expand Up @@ -396,7 +396,7 @@ func renderICCBased(xRefTable *XRefTable, im *PDFImage, resourceName string, cs
// Any ICC profile >= ICC.1:2004:10 is sufficient for any PDF version <= 1.7
// If the embedded ICC profile version is newer than the one used by the Reader, substitute with Alternate color space.

iccProfileStream, _ := xRefTable.DereferenceStreamDict(cs[1])
iccProfileStream, _, _ := xRefTable.DereferenceStreamDict(cs[1])

b := im.sd.Content

Expand Down Expand Up @@ -539,7 +539,7 @@ func renderIndexedArrayCS(xRefTable *XRefTable, im *PDFImage, resourceName strin

case ICCBasedCS:

iccProfileStream, _ := xRefTable.DereferenceStreamDict(csa[1])
iccProfileStream, _, _ := xRefTable.DereferenceStreamDict(csa[1])

// 1,3 or 4 color components.
n := *iccProfileStream.IntEntry("N")
Expand Down
22 changes: 18 additions & 4 deletions pkg/pdfcpu/validate/acroForm.go
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ func validateAcroFieldDict(xRefTable *pdf.XRefTable, ir pdf.IndirectRef, inField
}
}

if err := xRefTable.SetValid(ir); err != nil {
return err
}

if o, ok := d.Find("Kids"); ok {

// dict represents a non terminal field.
Expand All @@ -232,16 +236,20 @@ func validateAcroFieldDict(xRefTable *pdf.XRefTable, ir pdf.IndirectRef, inField
}

for _, value := range a {

ir, ok := value.(pdf.IndirectRef)
if !ok {
return errors.New("pdfcpu: validateAcroFieldDict: corrupt kids array: entries must be indirect reference")
}

if err = validateAcroFieldDict(xRefTable, ir, xInFieldType); err != nil {
valid, err := xRefTable.IsValid(ir)
if err != nil {
return err
}

if !valid {
if err = validateAcroFieldDict(xRefTable, ir, xInFieldType); err != nil {
return err
}
}
}

return nil
Expand All @@ -264,11 +272,17 @@ func validateAcroFormFields(xRefTable *pdf.XRefTable, o pdf.Object) error {
return errors.New("pdfcpu: validateAcroFormFields: corrupt form field array entry")
}

err = validateAcroFieldDict(xRefTable, ir, nil)
valid, err := xRefTable.IsValid(ir)
if err != nil {
return err
}

if !valid {
if validateAcroFieldDict(xRefTable, ir, nil); err != nil {
return err
}
}

}

return nil
Expand Down
12 changes: 6 additions & 6 deletions pkg/pdfcpu/validate/colorspace.go
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,12 @@ func validateDeviceNColorSpaceAttributesDict(xRefTable *pdf.XRefTable, o pdf.Obj

dictName := "deviceNCSAttributesDict"

_, err = validateNameEntry(xRefTable, d, dictName, "Subtype", OPTIONAL, pdf.V16, func(s string) bool { return s == "DeviceN" || s == "NChannel" })
sinceVersion := pdf.V16
if xRefTable.ValidationMode == pdf.ValidationRelaxed {
sinceVersion = pdf.V13
}

_, err = validateNameEntry(xRefTable, d, dictName, "Subtype", OPTIONAL, sinceVersion, func(s string) bool { return s == "DeviceN" || s == "NChannel" })
if err != nil {
return err
}
Expand All @@ -390,11 +395,6 @@ func validateDeviceNColorSpaceAttributesDict(xRefTable *pdf.XRefTable, o pdf.Obj
}
}

sinceVersion := pdf.V16
if xRefTable.ValidationMode == pdf.ValidationRelaxed {
sinceVersion = pdf.V13
}

d1, err = validateDictEntry(xRefTable, d, dictName, "Process", OPTIONAL, sinceVersion, nil)
if err != nil {
return err
Expand Down
2 changes: 1 addition & 1 deletion pkg/pdfcpu/validate/font.go
Original file line number Diff line number Diff line change
Expand Up @@ -727,7 +727,7 @@ func validateCharProcsDict(xRefTable *pdf.XRefTable, d pdf.Dict, dictName string

for _, v := range d1 {

_, err = xRefTable.DereferenceStreamDict(v)
_, _, err = xRefTable.DereferenceStreamDict(v)
if err != nil {
return err
}
Expand Down
26 changes: 17 additions & 9 deletions pkg/pdfcpu/validate/objects.go
Original file line number Diff line number Diff line change
Expand Up @@ -956,11 +956,19 @@ func validateStreamDictEntry(xRefTable *pdf.XRefTable, d pdf.Dict, dictName, ent
return nil, err
}

o, err = xRefTable.Dereference(o)
if err != nil {
sd, valid, err := xRefTable.DereferenceStreamDict(o)
if valid {
return nil, nil
}
if err != nil || sd == nil {
return nil, err
}
if o == nil {

// o, err = xRefTable.Dereference(o)
// if err != nil {
// return nil, err
// }
if sd == nil {
if required {
return nil, errors.Errorf("pdfcpu: validateStreamDictEntry: dict=%s required entry=%s is nil", dictName, entryName)
}
Expand All @@ -974,19 +982,19 @@ func validateStreamDictEntry(xRefTable *pdf.XRefTable, d pdf.Dict, dictName, ent
return nil, err
}

sd, ok := o.(pdf.StreamDict)
if !ok {
return nil, errors.Errorf("pdfcpu: validateStreamDictEntry: dict=%s entry=%s invalid type", dictName, entryName)
}
// sd, ok := o.(pdf.StreamDict)
// if !ok {
// return nil, errors.Errorf("pdfcpu: validateStreamDictEntry: dict=%s entry=%s invalid type", dictName, entryName)
// }

// Validation
if validate != nil && !validate(sd) {
if validate != nil && !validate(*sd) {
return nil, errors.Errorf("pdfcpu: validateStreamDictEntry: dict=%s entry=%s invalid dict entry", dictName, entryName)
}

log.Validate.Printf("validateStreamDictEntry end: entry=%s\n", entryName)

return &sd, nil
return sd, nil
}

func validateString(xRefTable *pdf.XRefTable, o pdf.Object, validate func(string) bool) (string, error) {
Expand Down
3 changes: 1 addition & 2 deletions pkg/pdfcpu/validate/pages.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,7 @@ func validatePageContents(xRefTable *pdf.XRefTable, d pdf.Dict) (hasContents boo
// process array of content stream dicts.

for _, o := range o {

o, err = xRefTable.DereferenceStreamDict(o)
o, _, err = xRefTable.DereferenceStreamDict(o)
if err != nil {
return false, err
}
Expand Down
15 changes: 8 additions & 7 deletions pkg/pdfcpu/validate/xObject.go
Original file line number Diff line number Diff line change
Expand Up @@ -511,9 +511,7 @@ func validateImageStreamDictPart2(xRefTable *pdf.XRefTable, sd *pdf.StreamDict,
}

func validateImageStreamDict(xRefTable *pdf.XRefTable, sd *pdf.StreamDict, isAlternate bool) error {

dictName := "imageStreamDict"

var isImageMask bool

isImageMask, err := validateImageStreamDictPart1(xRefTable, sd, dictName)
Expand Down Expand Up @@ -551,10 +549,10 @@ func validateImageStreamDict(xRefTable *pdf.XRefTable, sd *pdf.StreamDict, isAlt

// Name, name, required for V10
// Shall no longer be used.
_, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Name", xRefTable.Version() == pdf.V10, pdf.V10, nil)
if err != nil {
return err
}
// _, err = validateNameEntry(xRefTable, sd.Dict, dictName, "Name", xRefTable.Version() == pdf.V10, pdf.V10, nil)
// if err != nil {
// return err
// }

// StructParent, integer, optional
_, err = validateIntegerEntry(xRefTable, sd.Dict, dictName, "StructParent", OPTIONAL, pdf.V13, nil)
Expand Down Expand Up @@ -737,7 +735,10 @@ func validateXObjectStreamDict(xRefTable *pdf.XRefTable, o pdf.Object) error {

// Dereference stream dict and ensure it is validated exactly once in order handle
// XObjects(forms) with recursive structures like produced by Microsoft.
sd, err := xRefTable.DereferenceStreamDictForValidation(o, true)
sd, valid, err := xRefTable.DereferenceStreamDict(o)
if valid {
return nil
}
if err != nil || sd == nil {
return err
}
Expand Down
Loading

0 comments on commit 0b3fcc7

Please sign in to comment.