Skip to content

Commit

Permalink
close #404: Task warning when the PDF parser encounters errors or use…
Browse files Browse the repository at this point in the history
…s fallback mechanisms
  • Loading branch information
ediweissmann committed May 6, 2024
1 parent 55688b0 commit 08b7d77
Show file tree
Hide file tree
Showing 24 changed files with 91 additions and 24 deletions.
2 changes: 1 addition & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,7 @@
<hibernate-validator.version>7.0.5.Final</hibernate-validator.version>
<jakarta-el.version>4.0.2</jakarta-el.version>
<hamcrest.version>2.2</hamcrest.version>
<sambox.version>3.0.6</sambox.version>
<sambox.version>3.0.10</sambox.version>
<sejda.io.version>3.0.1</sejda.io.version>
<bouncycastle.version>1.76</bouncycastle.version>
<twelvemonkeys.version>3.9.4</twelvemonkeys.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,8 @@

import java.math.BigDecimal;
import java.math.RoundingMode;
import java.util.HashSet;
import java.util.Set;

/**
* An DSL class that can be used to notify all the global and local listeners about an event. All the listeners registered on the {@link GlobalNotificationContext} and on the
Expand All @@ -48,6 +50,8 @@ public final class ApplicationEventsNotifier implements Notifier, OngoingNotific

private BigDecimal percentage = BigDecimal.ZERO;
private NotifiableTaskMetadata taskMetadata;

private Set<String> warnings = new HashSet<>();

private ApplicationEventsNotifier(NotifiableTaskMetadata taskMetadata) {
this.taskMetadata = taskMetadata;
Expand Down Expand Up @@ -89,6 +93,16 @@ public void taskStarted() {
public void taskWarning(String warning) {
LOG.warn(warning);
notifyListeners(new TaskExecutionWarningEvent(warning, taskMetadata));
warnings.add(warning);
}

@Override
public void taskWarningOnce(String warning) {
if(warnings.contains(warning)){
return;
}

taskWarning(warning);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,14 @@ public interface Notifier {
*/
void taskWarning(String warning);

/**
* Notifies about a task without repeating the message
*
* @param warning
* warning warning message
*/
void taskWarningOnce(String warning);

/**
* Notifies about a task warning
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public class AddBackPagesTask extends BaseTask<AddBackPagesParameters> {
public void before(AddBackPagesParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ public class ExtractByOutlineTask extends BaseTask<ExtractByOutlineParameters> {
public void before(ExtractByOutlineParameters parameters, TaskExecutionContext executionContext)
throws TaskException {
super.before(parameters, executionContext);
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ public class ExtractPagesTask extends BaseTask<ExtractPagesParameters> {
@Override
public void before(ExtractPagesParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ public class MergeTask extends BaseTask<MergeParameters> {
public void before(MergeParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getInputList().size();
sourceOpener = new DefaultPdfSourceOpener();
sourceOpener = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newSingleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
outlineMerger = new OutlineMerger(parameters.getOutlinePolicy());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,13 +57,14 @@ public class PdfToMultipleImageTask<T extends AbstractPdfToMultipleImageParamete
private static final Logger LOG = LoggerFactory.getLogger(PdfToMultipleImageTask.class);

private MultipleOutputWriter outputWriter;
private PdfSourceOpener<PDDocumentHandler> sourceOpener = new DefaultPdfSourceOpener();
private PdfSourceOpener<PDDocumentHandler> sourceOpener;
private PDDocumentHandler documentHandler = null;

@Override
public void before(T parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
outputWriter = newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
sourceOpener = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ public class PdfToSingleImageTask<T extends AbstractPdfToSingleImageParameters>
private static final Logger LOG = LoggerFactory.getLogger(PdfToSingleImageTask.class);

private SingleOutputWriter outputWriter;
private PdfSourceOpener<PDDocumentHandler> sourceOpener = new DefaultPdfSourceOpener();
private PdfSourceOpener<PDDocumentHandler> sourceOpener = null;
private PDDocumentHandler documentHandler = null;

@Override
Expand All @@ -60,6 +60,7 @@ public void before(T parameters, TaskExecutionContext executionContext) throws T
throw new TaskExecutionException("Selected ImageWriter doesn't support multiple images in the same file");
}
outputWriter = newSingleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
sourceOpener = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public class RotateTask extends BaseTask<RotateParameters> {
public void before(RotateParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down Expand Up @@ -127,4 +127,4 @@ public void after() {
closeQuietly(documentHandler);
}

}
}
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ public class SetMetadataTask extends BaseTask<SetMetadataParameters> {
@Override
public void before(SetMetadataParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class SetPagesLabelTask extends BaseTask<SetPagesLabelParameters> {
@Override
public void before(SetPagesLabelParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newSingleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ public class SetPagesTransitionTask extends BaseTask<SetPagesTransitionParameter
public void before(SetPagesTransitionParameters parameters, TaskExecutionContext executionContext)
throws TaskException {
super.before(parameters, executionContext);
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newSingleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ public void before(SplitByOutlineLevelParameters parameters, TaskExecutionContex
throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ public class SplitByPageNumbersTask<T extends AbstractSplitByPageParameters> ext
public void before(T parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ public class SplitBySizeTask extends BaseTask<SplitBySizeParameters> {
public void before(SplitBySizeParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ public class UnpackTask extends BaseTask<UnpackParameters> {
public void before(UnpackParameters parameters, TaskExecutionContext executionContext) throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ public void before(ViewerPreferencesParameters parameters, TaskExecutionContext
throws TaskException {
super.before(parameters, executionContext);
totalSteps = parameters.getSourceList().size();
documentLoader = new DefaultPdfSourceOpener();
documentLoader = new DefaultPdfSourceOpener(executionContext);
outputWriter = OutputWriters.newMultipleOutputWriter(parameters.getExistingOutputPolicy(), executionContext);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,15 @@

import java.io.IOException;

import org.sejda.core.notification.dsl.ApplicationEventsNotifier;
import org.sejda.model.exception.TaskIOException;
import org.sejda.model.exception.TaskWrongPasswordException;
import org.sejda.model.input.PdfFileSource;
import org.sejda.model.input.PdfSource;
import org.sejda.model.input.PdfSourceOpener;
import org.sejda.model.input.PdfStreamSource;
import org.sejda.model.input.PdfURLSource;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.input.PDFParser;
import org.sejda.sambox.pdmodel.PDDocument;
import org.sejda.sambox.pdmodel.encryption.InvalidPasswordException;
Expand All @@ -41,6 +43,18 @@ public class DefaultPdfSourceOpener implements PdfSourceOpener<PDDocumentHandler
private static final String WRONG_PWD_MESSAGE = "Unable to open '%s' due to a wrong password.";
private static final String ERROR_MESSAGE = "An error occurred opening the source: %s.";

private static final String WARNING_PARSE_ERRORS_MESSAGE = "Errors were detected when reading document: %s. Please verify the results carefully.";

private final TaskExecutionContext taskExecutionContext;

public DefaultPdfSourceOpener() {
this.taskExecutionContext = null;
}

public DefaultPdfSourceOpener(TaskExecutionContext taskExecutionContext) {
this.taskExecutionContext = taskExecutionContext;
}

@Override
public PDDocumentHandler open(PdfURLSource source) throws TaskIOException {
return openGeneric(source);
Expand All @@ -59,6 +73,13 @@ public PDDocumentHandler open(PdfStreamSource source) throws TaskIOException {
private PDDocumentHandler openGeneric(PdfSource<?> source) throws TaskIOException {
try {
PDDocument document = PDFParser.parse(source.getSeekableSource(), source.getPassword());
if(document.hasParseErrors()){
if(taskExecutionContext != null){
ApplicationEventsNotifier.notifyEvent(taskExecutionContext.notifiableTaskMetadata())
.taskWarningOnce(String.format(WARNING_PARSE_ERRORS_MESSAGE, source.getName()));
}
}

return new PDDocumentHandler(document);
} catch (InvalidPasswordException ipe) {
throw new TaskWrongPasswordException(String.format(WRONG_PWD_MESSAGE, source.getName()), ipe);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ public void mix(List<PdfMixInput> inputs, TaskExecutionContext executionContext)
setCreatorOnPDDocument();
for (PdfMixInput input : inputs) {
executionContext.notifiableTaskMetadata().setCurrentSource(input.getSource());
mixFragments.add(PdfMixFragment.newInstance(input));
mixFragments.add(PdfMixFragment.newInstance(input, executionContext));
}

int maxNumberOfPages = 0;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import org.sejda.model.input.PdfMixInput;
import org.sejda.model.input.PdfSource;
import org.sejda.model.pdf.encryption.PdfAccessPermission;
import org.sejda.model.task.TaskExecutionContext;
import org.sejda.sambox.pdmodel.PDPage;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
Expand Down Expand Up @@ -126,10 +127,10 @@ public void close() throws IOException {
* @throws TaskIOException
* @throws TaskPermissionsException
*/
public static PdfMixFragment newInstance(PdfMixInput input) throws TaskIOException, TaskPermissionsException {
public static PdfMixFragment newInstance(PdfMixInput input, TaskExecutionContext executionContext) throws TaskIOException, TaskPermissionsException {
LOG.debug("Opening input {} with step {} and reverse {}", input.getSource(), input.getStep(),
input.isReverse());
PDDocumentHandler documentHandler = input.getSource().open(new DefaultPdfSourceOpener());
PDDocumentHandler documentHandler = input.getSource().open(new DefaultPdfSourceOpener(executionContext));
documentHandler.getPermissions().ensurePermission(PdfAccessPermission.ASSEMBLE);
return new PdfMixFragment(input, documentHandler);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1112,6 +1112,24 @@ public void form_field_loses_formatting_in_Mac_Preview() throws IOException {
});
}

@Test
public void document_with_parse_errors() throws IOException {
MergeParameters parameters = new MergeParameters();
parameters.setExistingOutputPolicy(ExistingOutputPolicy.OVERWRITE);
parameters.addInput(new PdfMergeInput(customInput("pdf/test_parse_errors.pdf", "test_parse_errors1.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/test_parse_errors.pdf", "test_parse_errors2.pdf")));
parameters.addInput(new PdfMergeInput(customInput("pdf/test_file.pdf")));

testContext.pdfOutputTo(parameters);
execute(parameters);

testContext.assertTaskCompleted();
testContext.assertTaskWarnings(Arrays.asList(
"Errors were detected when reading document: test_parse_errors1.pdf. Please verify the results carefully.",
"Errors were detected when reading document: test_parse_errors2.pdf. Please verify the results carefully."
));
}

private float widthOfCropBox(PDPage page) {
return page.getCropBox().rotate(page.getRotation()).getWidth();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
import org.sejda.model.exception.TaskPermissionsException;
import org.sejda.model.input.PdfMixInput;
import org.sejda.model.input.PdfStreamSource;
import org.sejda.model.task.TaskExecutionContext;

import java.awt.Rectangle;
import java.io.IOException;
Expand All @@ -37,12 +38,14 @@
* @author Andrea Vacondio
*/
public class PdfMixFragmentTest {

private TaskExecutionContext executionContext = null;

@Test
public void nextPage() throws TaskIOException, TaskPermissionsException, IOException {
try (PdfMixFragment victim = PdfMixFragment.newInstance(new PdfMixInput(
PdfStreamSource.newInstanceNoPassword(
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf")))) {
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf")), executionContext)) {
assertTrue(victim.hasNextPage());
assertThat(
new PdfTextExtractorByArea().extractTextFromArea(victim.nextPage(), new Rectangle(54, 56, 60, 21))
Expand All @@ -58,23 +61,23 @@ public void nextPage() throws TaskIOException, TaskPermissionsException, IOExcep
@Test
public void numberOfPages() throws TaskIOException, TaskPermissionsException, IOException {
try (PdfMixFragment victim = PdfMixFragment.newInstance(new PdfMixInput(PdfStreamSource.newInstanceNoPassword(
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf")))) {
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf")), executionContext)) {
assertEquals(2, victim.getNumberOfPages());
}
}

@Test
public void step() throws TaskIOException, TaskPermissionsException, IOException {
try (PdfMixFragment victim = PdfMixFragment.newInstance(new PdfMixInput(PdfStreamSource.newInstanceNoPassword(
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf"), true, 2))) {
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf"), true, 2), executionContext)) {
assertEquals(2, victim.getStep());
}
}

@Test
public void reverse() throws TaskIOException, TaskPermissionsException, IOException {
try (PdfMixFragment victim = PdfMixFragment.newInstance(new PdfMixInput(PdfStreamSource.newInstanceNoPassword(
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf"), true, 1))) {
getClass().getClassLoader().getResourceAsStream("pdf/2_pages.pdf"), "test.pdf"), true, 1), executionContext)) {
assertTrue(victim.hasNextPage());
assertThat(
new PdfTextExtractorByArea().extractTextFromArea(victim.nextPage(), new Rectangle(54, 56, 60, 21))
Expand Down
Binary file not shown.

0 comments on commit 08b7d77

Please sign in to comment.