Skip to content

Commit

Permalink
Fix #252
Browse files Browse the repository at this point in the history
  • Loading branch information
hhrutter committed Nov 29, 2020
1 parent c9c7bf1 commit cd73588
Show file tree
Hide file tree
Showing 3 changed files with 52 additions and 21 deletions.
69 changes: 49 additions & 20 deletions pkg/pdfcpu/parse.go
Expand Up @@ -90,16 +90,29 @@ func positionToNextEOL(s string) string {
}

// trimLeftSpace trims leading whitespace and trailing comment.
func trimLeftSpace(s string) (outstr string, trimmedSpaces int) {
func trimLeftSpace(s string, relaxed bool) (outstr string, eol bool) {

log.Parse.Printf("TrimLeftSpace: begin %s\n", s)

whitespace := func(c rune) bool { return unicode.IsSpace(c) }

whitespaceNoEol := func(r rune) bool {
switch r {
case '\t', '\v', '\f', ' ', 0x85, 0xA0:
return true
}
return false
}

outstr = s

for {
// trim leading whitespace
if relaxed {
outstr = strings.TrimLeftFunc(outstr, whitespaceNoEol)
if len(outstr) >= 1 && (outstr[0] == '\n' || outstr[0] == '\r') {
eol = true
}
}
outstr = strings.TrimLeftFunc(outstr, whitespace)
log.Parse.Printf("1 outstr: <%s>\n", outstr)
if len(outstr) <= 1 || outstr[0] != '%' {
Expand All @@ -111,11 +124,9 @@ func trimLeftSpace(s string) (outstr string, trimmedSpaces int) {

}

trimmedSpaces = len(s) - len(outstr)

log.Parse.Printf("TrimLeftSpace: end %s %d\n", outstr, trimmedSpaces)
log.Parse.Printf("TrimLeftSpace: end %s\n", outstr)

return outstr, trimmedSpaces
return outstr, eol
}

// HexString validates and formats a hex string to be of even length.
Expand Down Expand Up @@ -241,7 +252,7 @@ func parseObjectAttributes(line *string) (objectNumber *int, generationNumber *i

// object number

l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find object number")
}
Expand All @@ -259,7 +270,7 @@ func parseObjectAttributes(line *string) (objectNumber *int, generationNumber *i
// generation number

l = l[i:]
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
return nil, nil, errors.New("pdfcpu: ParseObjectAttributes: can't find generation number")
}
Expand Down Expand Up @@ -304,7 +315,7 @@ func parseArray(line *string) (*Array, error) {
l = forwardParseBuf(l, 1)

// position to first non whitespace char after '['
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)

if len(l) == 0 {
// only whitespace after '['
Expand All @@ -328,7 +339,7 @@ func parseArray(line *string) (*Array, error) {
}

// position to next non whitespace char.
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
return nil, errArrayNotTerminated
}
Expand Down Expand Up @@ -503,7 +514,7 @@ func parseName(line *string) (*Name, error) {
return &nameObj, nil
}

func parseDict(line *string) (*Dict, error) {
func parseDict(line *string, relaxed bool) (*Dict, error) {

if line == nil || len(*line) == 0 {
return nil, errNoDictionary
Expand All @@ -521,14 +532,15 @@ func parseDict(line *string) (*Dict, error) {
l = forwardParseBuf(l, 2)

// position to first non whitespace char after '<<'
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)

if len(l) == 0 {
// only whitespace after '['
return nil, errDictionaryNotTerminated
}

d := NewDict()
var eol bool

for !strings.HasPrefix(l, ">>") {

Expand All @@ -539,14 +551,26 @@ func parseDict(line *string) (*Dict, error) {
log.Parse.Printf("ParseDict: key = %s\n", key)

// position to first non whitespace after key
l, _ = trimLeftSpace(l)
l, eol = trimLeftSpace(l, relaxed)

if len(l) == 0 {
log.Parse.Println("ParseDict: only whitespace after key")
// only whitespace after key
return nil, errDictionaryNotTerminated
}

// A friendly 🤢 to the devs of the Kdan Pocket Scanner for the iPad.
// Hack for #252:
// For dicts with kv pairs terminated by eol we accept a missing value as an empty string.
if eol {
obj := StringLiteral("")
log.Parse.Printf("ParseDict: dict[%s]=%v\n", key, obj)
if ok := d.Insert(string(*key), obj); !ok {
return nil, errDictionaryDuplicateKey
}
continue
}

obj, err := parseObject(&l)
if err != nil {
return nil, err
Expand All @@ -567,7 +591,7 @@ func parseDict(line *string) (*Dict, error) {
}

// position to next non whitespace char.
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
return nil, errDictionaryNotTerminated
}
Expand Down Expand Up @@ -667,7 +691,7 @@ func parseNumericOrIndRef(line *string) (Object, error) {
iref1 := i

l = l[i1:]
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
// only whitespace
*line = l1
Expand Down Expand Up @@ -702,7 +726,7 @@ func parseNumericOrIndRef(line *string) (Object, error) {
// Look for "R"

l = l[i2:]
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)

if len(l) == 0 {
// only whitespace
Expand Down Expand Up @@ -733,9 +757,14 @@ func parseHexLiteralOrDict(l *string) (val Object, err error) {
// if next char = '<' parseDict.
if (*l)[1] == '<' {
log.Parse.Println("parseHexLiteralOrDict: value = Dictionary")
d, err := parseDict(l)
if err != nil {
return nil, err
var (
d *Dict
err error
)
if d, err = parseDict(l, false); err != nil {
if d, err = parseDict(l, true); err != nil {
return nil, err
}
}
val = *d
} else {
Expand Down Expand Up @@ -784,7 +813,7 @@ func parseObject(line *string) (Object, error) {
log.Parse.Printf("ParseObject: buf= <%s>\n", l)

// position to first non whitespace char
l, _ = trimLeftSpace(l)
l, _ = trimLeftSpace(l, false)
if len(l) == 0 {
// only whitespace
return nil, errBufNotAvailable
Expand Down
2 changes: 2 additions & 0 deletions pkg/pdfcpu/parse_dict_test.go
Expand Up @@ -49,6 +49,8 @@ func doTestParseDictGeneral(t *testing.T) {

func doTestParseDictNameObjects(t *testing.T) {
// Name Objects
doTestParseDictOK("<</Title \x0a/Type /Outline\x0a/Key /Value>>", t)
doTestParseDictOK("<</Key1 /Value1\x0a/Title \x0a/Type /Outline\x0a/Key /Value>>", t)
doTestParseDictOK("<</S/A>>", t) // empty name
doTestParseDictOK("<</K1 / /K2 /Name2>>", t)
doTestParseDictOK("<</Key/Value>>", t)
Expand Down
2 changes: 1 addition & 1 deletion pkg/pdfcpu/validate/outlineTree.go
Expand Up @@ -148,7 +148,7 @@ func validateOutlines(xRefTable *pdf.XRefTable, rootDict pdf.Dict, required bool
}

// Type, optional, name
_, err = validateNameEntry(xRefTable, d, "outlineDict", "Type", OPTIONAL, pdf.V10, func(s string) bool { return s == "Outlines" })
_, err = validateNameEntry(xRefTable, d, "outlineDict", "Type", OPTIONAL, pdf.V10, func(s string) bool { return s == "Outlines" || s == "Outline" })
if err != nil {
return err
}
Expand Down

0 comments on commit cd73588

Please sign in to comment.