Skip to content

Commit

Permalink
LDEV-3048 add support for action="extractImages" (#53)
Browse files Browse the repository at this point in the history
workaround to return extracted image in correct order
  • Loading branch information
zspitzer committed Sep 8, 2023
1 parent 1cd54e9 commit 92c48d7
Show file tree
Hide file tree
Showing 6 changed files with 207 additions and 5 deletions.
1 change: 1 addition & 0 deletions .github/workflows/main-5.4.yml
Expand Up @@ -54,6 +54,7 @@ jobs:
luceeVersion: ${{ env.luceeVersion }}
luceeVersionQuery: ${{ env.luceeVersionQuery }}
extensionDir: ${{ github.workspace }}/dist
extensions: B737ABC4-D43F-4D91-8E8E973E37C40D1B # image-ext for tests
env:
testLabels: pdf
testAdditional: ${{ github.workspace }}/tests
3 changes: 2 additions & 1 deletion .github/workflows/main.yml
Expand Up @@ -53,7 +53,8 @@ jobs:
execute: /bootstrap-tests.cfm
luceeVersion: ${{ env.luceeVersion }}
luceeVersionQuery: ${{ env.luceeVersionQuery }}
extensionDir: ${{ github.workspace }}/dist
extensionDir: ${{ github.workspace }}/dist
extensions: B737ABC4-D43F-4D91-8E8E973E37C40D1B # image-ext for tests
env:
testLabels: pdf
testAdditional: ${{ github.workspace }}/tests
25 changes: 23 additions & 2 deletions source/java/src/org/lucee/extension/pdf/tag/PDF.java
Expand Up @@ -34,6 +34,7 @@
import java.util.Map.Entry;
import java.util.Set;

import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.lucee.extension.pdf.PDFStruct;
import org.lucee.extension.pdf.util.PDFUtil;

Expand Down Expand Up @@ -82,6 +83,7 @@ public class PDF extends BodyTagImpl {
private static final int ACTION_ADD_HEADER = 12;
private static final int ACTION_ADD_FOOTER = 13;
private static final int ACTION_OPEN = 14;
private static final int ACTION_EXTRACT_IMAGES = 15;

private static final String FORMAT_JPG = "jpg";
private static final String FORMAT_TIFF = "tiff";
Expand Down Expand Up @@ -309,9 +311,12 @@ public void setAction(String strAction) throws PageException {
else if ("extract_text".equals(strAction)) action = ACTION_EXTRACT_TEXT;
else if ("addheader".equals(strAction)) action = ACTION_ADD_HEADER;
else if ("addfooter".equals(strAction)) action = ACTION_ADD_FOOTER;
else if ("extractimages".equals(strAction)) action = ACTION_EXTRACT_IMAGES;
else if ("extract-images".equals(strAction)) action = ACTION_EXTRACT_IMAGES;
else if ("extract_images".equals(strAction)) action = ACTION_EXTRACT_IMAGES;

else throw engine.getExceptionUtil().createApplicationException(
"Invalid PDF action [" + strAction + "], supported actions are " + "[addHeader, addFooter, addWatermark, deletePages, extractText, getInfo, merge, open, "
"Invalid PDF action [" + strAction + "], supported actions are " + "[addHeader, addFooter, addWatermark, deletePages, extractText, extractImage, getInfo, merge, open, "
+ "removePassword, protect, read, removeWatermark, setInfo, thumbnail, write]");

}
Expand Down Expand Up @@ -692,9 +697,11 @@ public int doEndTag() throws PageException {
else if (ACTION_PROTECT == action) doActionProtect(true);
else if (ACTION_OPEN == action) doActionProtect(false);
else if (ACTION_THUMBNAIL == action) doActionThumbnail();
else if (ACTION_EXTRACT_IMAGES == action) doActionExtractImages();
else if (ACTION_EXTRACT_TEXT == action) {
doActionExtractText();
}


// else if(ACTION_PROCESSDDX==action) throw
// engine.getExceptionUtil().createApplicationException("action [processddx] not supported");
Expand Down Expand Up @@ -933,7 +940,7 @@ private void doActionThumbnail() throws PageException, IOException, DocumentExce
Resource resource;
if (imagePrefix == null) imagePrefix = (resource = doc.getResource()) != null ? getName(resource.getName()): "thumbnail";

PDFUtil.thumbnail(pageContext, doc, destination.toString(), pageSet, format, imagePrefix, scale);
PDFUtil.thumbnail(pageContext, doc, destination.toString(), pageSet, format, imagePrefix, scale, overwrite);
}
finally {
reader.close();
Expand Down Expand Up @@ -1446,6 +1453,20 @@ private void doActionExtractText() throws PageException, IOException {
}
}

private void doActionExtractImages() throws PageException, IOException, InvalidPasswordException {
required("pdf", "extractImages", "source", source);
required("pdf", "extractImages", "destination", destination);
required("pdf", "extractImages", "imagePrefix", imagePrefix);
required("pdf", "extractImages", "format", format);
PDFStruct doc = toPDFDocument(source, password, null);
PdfReader reader = doc.getPdfReader();
int len = reader.getNumberOfPages();
if (pages == null || pages.equals("*")) pages = "1-" + len + "";
Set<Integer> pageSet = PDFUtil.parsePageDefinition(pages, len);

PDFUtil.extractImages(pageContext,doc,pageSet,destination,imagePrefix, format, overwrite);
}

private Object allowed(boolean encrypted, int permissions, int permission) {
return (!encrypted || (permissions & permission) > 0) ? "Allowed" : "Not Allowed";
}
Expand Down
51 changes: 49 additions & 2 deletions source/java/src/org/lucee/extension/pdf/util/PDFUtil.java
Expand Up @@ -26,15 +26,21 @@
import java.io.IOException;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.apache.pdfbox.cos.COSName;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDPageTree;
import org.apache.pdfbox.pdmodel.PDResources;
import org.apache.pdfbox.pdmodel.encryption.InvalidPasswordException;
import org.apache.pdfbox.pdmodel.graphics.PDXObject;
import org.apache.pdfbox.pdmodel.graphics.image.PDImageXObject;
import org.apache.pdfbox.rendering.PDFRenderer;
import org.apache.pdfbox.text.PDFTextStripper;
import org.lucee.extension.pdf.PDFStruct;
Expand Down Expand Up @@ -384,7 +390,7 @@ public static Object extractText(PDFStruct doc, Set<Integer> pageNumbers, int ty
// return pdDoc.getDocumentCatalog().getAllPages().get(2);
}

public static void thumbnail(PageContext pc, PDFStruct doc, String destination, Set<Integer> pageNumbers, String format, String imagePrefix, int scale) throws IOException {
public static void thumbnail(PageContext pc, PDFStruct doc, String destination, Set<Integer> pageNumbers, String format, String imagePrefix, int scale, boolean overwrite) throws IOException {

CFMLEngine engine = CFMLEngineFactory.getInstance();

Expand All @@ -406,7 +412,48 @@ public static void thumbnail(PageContext pc, PDFStruct doc, String destination,
BufferedImage thumbnailImage = pdfRender.renderImageWithDPI(p - 1, scale);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(thumbnailImage, format, baos); // this one not support .tiff format
engine.getIOUtil().copy(new ByteArrayInputStream(baos.toByteArray()), engine.getResourceUtil().toResourceNotExisting(pc, imageDestination), true);
Resource res = engine.getResourceUtil().toResourceNotExisting(pc, imageDestination);
if (res.exists() && !overwrite) throw new RuntimeException("Thumbnail image file already exists [" + imageDestination + "] and overwrite was false");
engine.getIOUtil().copy(new ByteArrayInputStream(baos.toByteArray()), res, true);
}
}

public static void extractImages(PageContext pc,PDFStruct doc, Set<Integer> pageNumbers,Resource destination, String imagePrefix, String format, boolean overwrite) throws IOException, InvalidPasswordException,PageException {

PDDocument pdDoc = doc.toPDDocument();
int n = pdDoc.getNumberOfPages();
Iterator<Integer> it = pageNumbers.iterator();
int p;
PDPageTree pages= pdDoc.getPages();
int i = 1;
while (it.hasNext()) {
p = it.next();
if (p > n) throw new RuntimeException("pdf page size [" + p + "] out of range, maximum page size is [" + n + "]");
PDResources pdResources = pages.get(p - 1).getResources();

// workjaround, getXObjectNames() returns images in reverse order
ArrayList<COSName> xObjectNamesReversed = new ArrayList<>();
for (COSName name : pdResources.getXObjectNames()) {
xObjectNamesReversed.add(name);
}
Collections.reverse(xObjectNamesReversed);

for (COSName name : xObjectNamesReversed) {
PDXObject o = pdResources.getXObject(name);

if (o instanceof PDImageXObject) {
PDImageXObject image = (PDImageXObject)o;
String filename = destination + "/" + imagePrefix + "-" + i + "." + format;
CFMLEngine engine = CFMLEngineFactory.getInstance();
Resource res = engine.getResourceUtil().toResourceNotExisting(pc,filename);
if (res.exists() && !overwrite) throw new RuntimeException("image file already exists [" + filename + "] and overwrite was false");
ByteArrayOutputStream baos = new ByteArrayOutputStream();
ImageIO.write(image.getImage(), format, baos);
CFMLEngineFactory.getInstance().getIOUtil().copy(new ByteArrayInputStream(baos.toByteArray()),res.getOutputStream(),true, true);
i++;
}
}
}

}
}
120 changes: 120 additions & 0 deletions tests/LDEV3048.cfc
@@ -0,0 +1,120 @@
component extends="org.lucee.cfml.test.LuceeTestCase" labels="pdf" {

function beforeAll() {
variables.outputDir = getDirectoryFromPath(getCurrentTemplatePath()) & "LDEV3048_images\";
if (!directoryExists( variables.outputDir ) )
directoryCreate( variables.outputDir );

var img1file = getTempFile( variables.outputDir, "ldev3048-1", "png" );
var img2file = getTempFile( variables.outputDir, "ldev3048-2", "png" );
var img3file = getTempFile( variables.outputDir, "ldev3048-3", "png" );

var img1 = ImageNew("", 111, 111, "rgb", "red");
var img2 = ImageNew("", 222, 222, "rgb", "yellow");
var img3 = ImageNew("", 333, 333, "rgb", "green");

imageWrite(img1, img1file, true);
imageWrite(img2, img2file, true);
imageWrite(img3, img3file, true);

if (!directoryExists(variables.outputDir)) directoryCreate(variables.outputDir);
document fileName="#variables.outputDir#noImages.pdf" name="pdfVar" overwrite=true {
writeoutput("test pdf file");
}

document fileName="#variables.outputDir#withImages.pdf" name="pdfVar" overwrite=true {
```
<cfoutput>
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8" />
<title>LDEV-3048</title>
<style type="text/css">
.start-on-new-page {
page-break-before: always;
}
</style>
</head>
<body>
<div id="pageOne">
<img src="#img1File#">
</div>
<div id="pageTwo" class="start-on-new-page">
<img src="#img2File#">
<img src="#img3File#">
</div>
</body>
</html>
</cfoutput>
```
}
fileDelete(img1File);
fileDelete(img2File);
fileDelete(img3File);
}



function run( testResults , testBox ) {
describe( "testcase for LDEV-3048", function() {

it( title="cfpdf extractImages, pdf with no images", body=function( currentSpec ) {
pdf action="extractImages" source="#outputDir#noImages.pdf"
overwrite="true" format="png" imageprefix="no-image" password=""
destination="#outputDir#";

var imageFiles = directoryList( path=outputDir, filter="no-image*.png" );

expect( len( imageFiles ) ).toBe( 0 );
});

it( title="cfpdf extractImages, pdf with 2 images, 1 per page", body=function( currentSpec ) {
pdf action="extractImages" source="#outputDir#withImages.pdf" pages="*"
overwrite="true" format="png" imageprefix="two-image" password=""
destination="#outputDir#";

var imageFiles = directoryList( path=outputDir, filter="two-image*.png" );

expect( len( imageFiles ) ).toBe( 3 );
var imgInfo = ImageInfo( outputDir & "two-image-1.png" );
expect( imgInfo.height ).toBe( 111 );
expect( imgInfo.width ).toBe( 111 );

});

it( title="cfpdf extractImages, pdf with 2 images, 1 per page, only from page 2", body=function( currentSpec ) {
pdf action="extractImages" source="#outputDir#withImages.pdf" pages="2"
overwrite="true" format="png" imageprefix="page-image" password=""
destination="#outputDir#";

var imageFiles = directoryList( path=outputDir, filter="page-image*.png" );

expect( len( imageFiles ) ).toBe( 2 );
var imgInfo = ImageInfo( outputDir & "page-image-1.png" );
expect( imgInfo.height ).toBe( 222 );
expect( imgInfo.width ).toBe( 222 );

expect(function(){
pdf action="extractImages" source="#outputDir#withImages.pdf" pages="2"
overwrite="false" format="png" imageprefix="page-image" password=""
destination="#outputDir#";
}).toThrow(); // overwrite="false" and images already exist
});

it( title="cfpdf extractImages, invalid image format", body=function( currentSpec ) {
expect(function(){
pdf action="extractImages" source="#outputDir#withImages.pdf" pages="2"
overwrite="true" format="monkey" imageprefix="invalid-image" password=""
destination="#outputDir#";
}).toThrow();
});

});
}

function afterAll() {
if ( directoryExists( variables.outputDir ) )
directoryDelete(variables.outputDir, true);
}
}
12 changes: 12 additions & 0 deletions tests/LDEV967.cfc
Expand Up @@ -65,6 +65,18 @@ component extends = "org.lucee.cfml.test.LuceeTestCase" labels="pdf" {
expect(arrayEvery(imgFiles, (e) => { return listLast(e,".") == "png"})).toBeTrue();
});

it(title="CFPDF action=thumbnail - overwrite false", body=function( currentSpec ) {
pdf action="thumbnail" source="#res#" overwrite="true" destination="#variables.thumbnaildir#" imageprefix="thumbImage";
var imgFiles = directoryList( path="#variables.thumbnaildir#", listInfo="name");
imgFiles.sort("text");
expect(imgFiles[1]).toBe("thumbImage_page_1.jpg");
expect(arrayEvery(imgFiles, (e) => { return find("thumbImage", e)})).toBeTrue();

expect(function(){
pdf action="thumbnail" source="#res#" overwrite="false" destination="#variables.thumbnaildir#" imageprefix="thumbImage";
}).toThrow(); // overwite is false and file exists
});

});
}

Expand Down

0 comments on commit 92c48d7

Please sign in to comment.