Skip to content

Commit

Permalink
Fix #727
Browse files Browse the repository at this point in the history
  • Loading branch information
hhrutter committed Nov 26, 2023
1 parent cf3b64a commit 9964328
Show file tree
Hide file tree
Showing 7 changed files with 163 additions and 141 deletions.
13 changes: 8 additions & 5 deletions cmd/pdfcpu/process.go
Expand Up @@ -1833,18 +1833,21 @@ func processListImagesCommand(conf *model.Configuration) {
}

func processDumpCommand(conf *model.Configuration) {
s := "No dump for you!! One year!\n\n"
s := "No dump for you! - One year!\n\n"
if len(flag.Args()) != 3 {
fmt.Fprintln(os.Stderr, s)
os.Exit(1)
}

mode := flag.Arg(0)
hex := mode[0] == 'h' || mode[0] == 'H'

vals := []int{0, 0}
if hex {

mode := strings.ToLower(flag.Arg(0))

switch mode[0] {
case 'a':
vals[0] = 1
case 'h':
vals[0] = 2
}

objNr, err := strconv.Atoi(flag.Arg(1))
Expand Down
8 changes: 4 additions & 4 deletions pkg/api/validate.go
Expand Up @@ -131,7 +131,7 @@ func ValidateFiles(inFiles []string, conf *model.Configuration) error {
}

// DumpObject writes an object from rs to stdout.
func DumpObject(rs io.ReadSeeker, objNr int, hex bool, conf *model.Configuration) error {
func DumpObject(rs io.ReadSeeker, mode, objNr int, conf *model.Configuration) error {
if rs == nil {
return errors.New("pdfcpu: DumpObject: missing rs")
}
Expand All @@ -154,13 +154,13 @@ func DumpObject(rs io.ReadSeeker, objNr int, hex bool, conf *model.Configuration
return errors.Wrap(err, fmt.Sprintf("validation error (obj#:%d)%s", ctx.CurObj, s))
}

ctx.DumpStream(objNr, hex)
ctx.DumpObject(objNr, mode)

return err
}

// DumpObjectFile writes an object from rs to stdout.
func DumpObjectFile(inFile string, objNr int, hex bool, conf *model.Configuration) error {
func DumpObjectFile(inFile string, mode, objNr int, conf *model.Configuration) error {
if conf == nil {
conf = model.NewDefaultConfiguration()
}
Expand All @@ -172,5 +172,5 @@ func DumpObjectFile(inFile string, objNr int, hex bool, conf *model.Configuratio

defer f.Close()

return DumpObject(f, objNr, hex, conf)
return DumpObject(f, mode, objNr, conf)
}
4 changes: 2 additions & 2 deletions pkg/cli/cli.go
Expand Up @@ -282,9 +282,9 @@ func ListImages(cmd *Command) ([]string, error) {

// Dump known object to stdout.
func Dump(cmd *Command) ([]string, error) {
hex := cmd.IntVals[0] == 1
mode := cmd.IntVals[0]
objNr := cmd.IntVals[1]
return nil, api.DumpObjectFile(*cmd.InFile, objNr, hex, cmd.Conf)
return nil, api.DumpObjectFile(*cmd.InFile, mode, objNr, cmd.Conf)
}

// Create renders page content corresponding to declarations found in inFileJSON and writes the result to outFile.
Expand Down
28 changes: 6 additions & 22 deletions pkg/pdfcpu/extract.go
Expand Up @@ -18,6 +18,7 @@ package pdfcpu

import (
"bytes"
"fmt"
"io"
"strings"

Expand Down Expand Up @@ -321,27 +322,6 @@ func img(
resourceID, filters, lastFilter string,
objNr int) (*model.Image, error) {

// "ImageMask" is a flag indicating whether the image shall be treated as an image mask.
// We do not extract imageMasks with the exception of CCITTDecoded images.
if imgMask {
// bpc = 1
if lastFilter != filter.CCITTFax {
if log.InfoEnabled() {
log.Info.Printf("ExtractImage(%d): skip img with imageMask\n", objNr)
}
return nil, nil
}
}

// An image XObject defining an image mask to be applied to this image, or an array specifying a range of colours to be applied to it as a colour key mask.
// Ignore if image has a Mask defined.
if sm, _ := sd.Find("Mask"); sm != nil {
if log.InfoEnabled() {
log.Info.Printf("ExtractImage(%d): skip image, unsupported \"Mask\"\n", objNr)
}
return nil, nil
}

// CCITTDecoded images / (bit) masks don't have a ColorSpace attribute, but we render image files.
if lastFilter == filter.CCITTFax {
if _, err := ctx.DereferenceDictEntry(sd.Dict, "ColorSpace"); err != nil {
Expand All @@ -365,8 +345,12 @@ func img(
}

default:
msg := fmt.Sprintf("pdfcpu: ExtractImage(obj#%d): skipping img, filter %s unsupported", objNr, filters)
if log.DebugEnabled() {
log.Debug.Printf("ExtractImage(%d): skip img, filter %s unsupported\n", objNr, filters)
log.Debug.Println(msg)
}
if log.CLIEnabled() {
log.CLI.Println(msg)
}
return nil, nil
}
Expand Down
61 changes: 38 additions & 23 deletions pkg/pdfcpu/model/xreftable.go
Expand Up @@ -17,6 +17,7 @@
package model

import (
"bufio"
"bytes"
"encoding/hex"
"fmt"
Expand All @@ -29,6 +30,7 @@ import (

"github.com/pdfcpu/pdfcpu/pkg/filter"
"github.com/pdfcpu/pdfcpu/pkg/log"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/scan"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -1054,7 +1056,11 @@ func (xRefTable *XRefTable) sortedKeys() []int {
return keys
}

func (xRefTable *XRefTable) DumpStream(objNr int, hexOut bool) {
func (xRefTable *XRefTable) DumpObject(objNr, mode int) {
// mode
// 0 .. silent / obj only
// 1 .. ascii
// 2 .. hex
entry := xRefTable.Table[objNr]
if entry == nil || entry.Free || entry.Compressed || entry.Object == nil {
fmt.Println(":(")
Expand Down Expand Up @@ -1088,32 +1094,41 @@ func (xRefTable *XRefTable) DumpStream(objNr int, hexOut bool) {
}
}

sd, ok := entry.Object.(types.StreamDict)
if ok {
if mode > 0 {
sd, ok := entry.Object.(types.StreamDict)
if ok {

err := sd.Decode()
if err == filter.ErrUnsupportedFilter {
str += "stream filter unsupported!"
fmt.Println(str)
return
}
if err != nil {
str += "decoding problem encountered!"
fmt.Println(str)
return
}
err := sd.Decode()
if err == filter.ErrUnsupportedFilter {
str += "stream filter unsupported!"
fmt.Println(str)
return
}
if err != nil {
str += "decoding problem encountered!"
fmt.Println(str)
return
}

s := "decoded stream content (length = %d)\n%s\n"
if hexOut {
str += fmt.Sprintf(s, len(sd.Content), hex.Dump(sd.Content))
} else {
str += fmt.Sprintf(s, len(sd.Content), sd.Content)
s := "decoded stream content (length = %d)\n%s\n"
s1 := ""
switch mode {
case 1:
sc := bufio.NewScanner(bytes.NewReader(sd.Content))
sc.Split(scan.LinesForSingleEol)
for sc.Scan() {
s1 += sc.Text() + "\n"
}
str += fmt.Sprintf(s, len(sd.Content), s1)
case 2:
str += fmt.Sprintf(s, len(sd.Content), hex.Dump(sd.Content))
}
}
}

osd, ok := entry.Object.(types.ObjectStreamDict)
if ok {
str += fmt.Sprintf("object stream count:%d size of objectarray:%d\n", osd.ObjCount, len(osd.ObjArray))
osd, ok := entry.Object.(types.ObjectStreamDict)
if ok {
str += fmt.Sprintf("object stream count:%d size of objectarray:%d\n", osd.ObjCount, len(osd.ObjArray))
}
}

fmt.Println(str)
Expand Down
88 changes: 3 additions & 85 deletions pkg/pdfcpu/read.go
Expand Up @@ -28,6 +28,7 @@ import (
"github.com/pdfcpu/pdfcpu/pkg/filter"
"github.com/pdfcpu/pdfcpu/pkg/log"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/model"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/scan"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/types"
"github.com/pkg/errors"
)
Expand Down Expand Up @@ -124,89 +125,6 @@ func fillBuffer(r io.Reader, buf []byte) (int, error) {
return n, err
}

// ScanLines is a split function for a Scanner that returns each line of
// text, stripped of any trailing end-of-line marker. The returned line may
// be empty. The end-of-line marker is one carriage return followed
// by one newline or one carriage return or one newline.
// The last non-empty line of input will be returned even if it has no newline.
func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}

indCR := bytes.IndexByte(data, '\r')
indLF := bytes.IndexByte(data, '\n')

switch {

case indCR >= 0 && indLF >= 0:
if indCR < indLF {
if indLF == indCR+1 {
// 0x0D0A
return indLF + 1, data[0:indCR], nil
}
// 0x0D ... 0x0A
return indCR + 1, data[0:indCR], nil
}
// 0x0A ... 0x0D
return indLF + 1, data[0:indLF], nil

case indCR >= 0:
// We have a full carriage return terminated line.
return indCR + 1, data[0:indCR], nil

case indLF >= 0:
// We have a full newline-terminated line.
return indLF + 1, data[0:indLF], nil

}

// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}

// Request more data.
return 0, nil, nil
}

func scanLinesForSingleEol(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}

indCR := bytes.IndexByte(data, '\r')
indLF := bytes.IndexByte(data, '\n')

switch {

case indCR >= 0 && indLF >= 0:
if indCR < indLF {
// 0x0D ... 0x0A
return indCR + 2, data[0:indCR], nil
}
// 0x0A ... 0x0D
return indLF + 2, data[0:indLF], nil

case indCR >= 0:
// We have a full carriage return terminated line.
return indCR + 1, data[0:indCR], nil

case indLF >= 0:
// We have a full newline-terminated line.
return indLF + 1, data[0:indLF], nil

}

// If we're at EOF, we have a final, non-terminated line. Return it.
if atEOF {
return len(data), data, nil
}

// Request more data.
return 0, nil, nil
}

func newPositionedReader(rs io.ReadSeeker, offset *int64) (*bufio.Reader, error) {
if _, err := rs.Seek(*offset, io.SeekStart); err != nil {
return nil, err
Expand Down Expand Up @@ -1413,7 +1331,7 @@ func bypassXrefSection(ctx *model.Context, offExtra int64) error {
}

s := bufio.NewScanner(rd)
s.Split(scanLinesForSingleEol)
s.Split(scan.LinesForSingleEol)

bb := []byte{}
var (
Expand Down Expand Up @@ -1538,7 +1456,7 @@ func tryXRefSection(ctx *model.Context, rs io.ReadSeeker, offset *int64, offExtr
s := bufio.NewScanner(rd)
buf := make([]byte, 0, 4096)
s.Buffer(buf, 1024*1024)
s.Split(scanLines)
s.Split(scan.Lines)

line, err := scanLine(s)
if err != nil {
Expand Down

0 comments on commit 9964328

Please sign in to comment.