Skip to content

Commit

Permalink
Fix #775, #490
Browse files Browse the repository at this point in the history
  • Loading branch information
hhrutter committed Jan 27, 2024
1 parent 04634d3 commit 043541b
Show file tree
Hide file tree
Showing 5 changed files with 73 additions and 60 deletions.
102 changes: 57 additions & 45 deletions pkg/pdfcpu/model/parse.go
Expand Up @@ -527,78 +527,90 @@ func parseName(line *string) (*types.Name, error) {
return &nameObj, nil
}

func insertKey(d types.Dict, key string, val types.Object, usesHexCodes bool) (bool, error) {
var duplicateKeyErr bool

if !usesHexCodes {
if strings.IndexByte(key, '#') < 0 {
// Avoid expensive "DecodeName".
if _, found := d[key]; !found {
d[key] = val
} else {
duplicateKeyErr = true
}
} else {
duplicateKeyErr = d.Insert(key, val)
usesHexCodes = true
}
} else {
duplicateKeyErr = d.Insert(key, val)
}

if duplicateKeyErr {
// for now we digest duplicate keys.
// TODO
// if !validationRelaxed {
// return false, errDictionaryDuplicateKey
// }
// if log.CLIEnabled() {
// log.CLI.Printf("ParseDict: digesting duplicate key\n")
// }
_ = duplicateKeyErr
}

if log.ParseEnabled() {
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, val)
}

return usesHexCodes, nil
}

func processDictKeys(line *string, relaxed bool) (types.Dict, error) {
l := *line
var eol bool
var hasNames bool
var usesHexCodes bool
d := types.NewDict()
for !strings.HasPrefix(l, ">>") {
key, err := parseName(&l)
keyName, err := parseName(&l)
if err != nil {
return nil, err
}
if log.ParseEnabled() {
log.Parse.Printf("ParseDict: key = %s\n", key)
log.Parse.Printf("ParseDict: key = %s\n", keyName)
}

// position to first non whitespace after key
// Position to first non whitespace after key.
l, eol = trimLeftSpace(l, relaxed)

if len(l) == 0 {
if log.ParseEnabled() {
log.Parse.Println("ParseDict: only whitespace after key")
}
// only whitespace after key
// Only whitespace after key.
return nil, errDictionaryNotTerminated
}

// Fix for #252:
// For dicts with kv pairs terminated by eol we accept a missing value as an empty string.
var val types.Object

if eol {
obj := types.StringLiteral("")
if log.ParseEnabled() {
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, obj)
}
stringKey := string(*key)
if !hasNames {
// Avoid expensive "DecodeName" on existing keys in "Insert".
if _, found := d[stringKey]; found {
return nil, errDictionaryDuplicateKey
}
d[stringKey] = obj
hasNames = strings.IndexByte(stringKey, '#') >= 0
} else {
if ok := d.Insert(stringKey, obj); !ok {
return nil, errDictionaryDuplicateKey
}
// #252: For dicts with kv pairs terminated by eol we accept a missing value as an empty string.
val = types.StringLiteral("")
} else {
if val, err = ParseObject(&l); err != nil {
return nil, err
}
continue
}

obj, err := ParseObject(&l)
if err != nil {
return nil, err
}

// Specifying the null object as the value of a dictionary entry (7.3.7, "Dictionary Objects")
// hall be equivalent to omitting the entry entirely.
if obj != nil {
stringKey := string(*key)
if !hasNames {
// Avoid expensive "DecodeName" on existing keys in "Insert".
if _, found := d[stringKey]; !found {
d[stringKey] = obj
hasNames = strings.IndexByte(stringKey, '#') >= 0
}
} else {
d.Insert(stringKey, obj)
// shall be equivalent to omitting the entry entirely.
if val != nil {
detectedHexCodes, err := insertKey(d, string(*keyName), val, usesHexCodes)
if err != nil {
return nil, err
}
if log.ParseEnabled() {
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, obj)
if !usesHexCodes && detectedHexCodes {
usesHexCodes = true
}
// if ok := d.Insert(string(*key), obj); !ok {
// return nil, errDictionaryDuplicateKey
// }
}

// We are positioned on the char behind the last parsed dict value.
Expand Down
2 changes: 0 additions & 2 deletions pkg/pdfcpu/model/parse_dict_test.go
Expand Up @@ -148,8 +148,6 @@ func doTestParseDictWithComments(t *testing.T) {
}

func doTestLargeDicts(t *testing.T) {
// Make sure parsing large dictionaries is fast. Found a file in the wild
// that has two dictionaries with about 200.000 entries each.
var sb strings.Builder
sb.WriteString("<<")
for i := 0; i < 50000; i++ {
Expand Down
12 changes: 7 additions & 5 deletions pkg/pdfcpu/read.go
Expand Up @@ -1361,7 +1361,10 @@ func showRep() {
// bypassXrefSection is a fix for digesting corrupt xref sections.
// It populates the xRefTable by reading in all indirect objects line by line
// and works on the assumption of a single xref section - meaning no incremental updates.
func bypassXrefSection(ctx *model.Context, offExtra int64) error {
func bypassXrefSection(ctx *model.Context, offExtra int64, wasErr error) error {
if log.ReadEnabled() {
log.Read.Printf("bypassXRefSection after %v\n", wasErr)
}
var z int64
g := types.FreeHeadGeneration
ctx.Table[0] = &model.XRefTableEntry{
Expand Down Expand Up @@ -1561,11 +1564,10 @@ func buildXRefTableStartingAt(ctx *model.Context, offset *int64) error {
return err
}
if offset, err = parseXRefStream(ctx, rd, offset, offExtra); err != nil {
if log.ReadEnabled() {
log.Read.Printf("bypassXRefSection after %v\n", err)
if ctx.XRefTable.ValidationMode == model.ValidationRelaxed {
// Try fix for corrupt single xref section.
return bypassXrefSection(ctx, offExtra, err)
}
// Try fix for corrupt single xref section.
return bypassXrefSection(ctx, offExtra)
}

}
Expand Down
11 changes: 5 additions & 6 deletions pkg/pdfcpu/types/dict.go
Expand Up @@ -52,13 +52,12 @@ func (d Dict) Clone() Object {
}

// Insert adds a new entry to this PDFDict.
func (d Dict) Insert(key string, value Object) (ok bool) {
_, found := d.Find(key)
if !found {
d[key] = value
ok = true
func (d Dict) Insert(k string, v Object) bool {
if _, found := d.Find(k); !found {
d[k] = v
return true
}
return ok
return false
}

// InsertBool adds a new bool entry to this PDFDict.
Expand Down
6 changes: 4 additions & 2 deletions pkg/pdfcpu/validate/colorspace.go
Expand Up @@ -597,8 +597,10 @@ func validateColorSpaceArray(xRefTable *model.XRefTable, a types.Array, excludeP
case model.DeviceNCS:
err = validateDeviceNColorSpace(xRefTable, a, model.V13)

// Relaxed validation:
case model.DeviceRGBCS:
case model.DeviceGrayCS, model.DeviceRGBCS, model.DeviceCMYKCS:
if xRefTable.ValidationMode != model.ValidationRelaxed {
err = errors.Errorf("pdfcpu: validateColorSpaceArray: undefined color space: %s\n", name)
}

default:
err = errors.Errorf("pdfcpu: validateColorSpaceArray: undefined color space: %s\n", name)
Expand Down

0 comments on commit 043541b

Please sign in to comment.