From ee25dff7bfb9e3f83622364f14b2cc8fda8ad3eb Mon Sep 17 00:00:00 2001 From: tballison Date: Tue, 29 Nov 2022 11:05:21 -0500 Subject: [PATCH] This closes #85. -- automatically load digipres defaults for csv output. --- .../assemblies/assembly-linux_x64.xml | 5 +++ .../assemblies/assembly-macosx_aarch64.xml | 5 +++ .../assemblies/assembly-macosx_x64.xml | 5 +++ tika-gui-app/assemblies/assembly-win_x64.xml | 5 +++ ...example-digipres-metadata-mappings-csv.csv | 40 ------------------- ...xample-digipres-metadata-mappings-jdbc.csv | 4 +- .../org/tallison/tika/app/fx/Constants.java | 2 + .../emitters/AbstractEmitterController.java | 19 +++++++++ .../app/fx/emitters/CSVEmitterController.java | 13 ++++-- .../tika/app/fx/emitters/CSVEmitterSpec.java | 35 ++++++++++------ 10 files changed, 76 insertions(+), 57 deletions(-) delete mode 100644 tika-gui-app/examples/example-digipres-metadata-mappings-csv.csv diff --git a/tika-gui-app/assemblies/assembly-linux_x64.xml b/tika-gui-app/assemblies/assembly-linux_x64.xml index dd52042..2e90902 100644 --- a/tika-gui-app/assemblies/assembly-linux_x64.xml +++ b/tika-gui-app/assemblies/assembly-linux_x64.xml @@ -40,6 +40,11 @@ + + ${project.basedir}/examples/example-digipres-metadata-mappings-jdbc.csv + default-metadata-mappings.csv + config + ${project.basedir}/target/jres/linux_x64/zulu17.38.21-ca-fx-jre17.0.5-linux_x64.tar.gz jre diff --git a/tika-gui-app/assemblies/assembly-macosx_aarch64.xml b/tika-gui-app/assemblies/assembly-macosx_aarch64.xml index 96119cc..be57eea 100644 --- a/tika-gui-app/assemblies/assembly-macosx_aarch64.xml +++ b/tika-gui-app/assemblies/assembly-macosx_aarch64.xml @@ -40,6 +40,11 @@ + + ${project.basedir}/examples/example-digipres-metadata-mappings-jdbc.csv + default-metadata-mappings.csv + config + ${project.basedir}/target/jres/macosx_aarch64/zulu17.38.21-ca-fx-jre17.0.5-macosx_aarch64.zip jre diff --git a/tika-gui-app/assemblies/assembly-macosx_x64.xml b/tika-gui-app/assemblies/assembly-macosx_x64.xml index de766ce..6434ce9 100644 --- a/tika-gui-app/assemblies/assembly-macosx_x64.xml +++ b/tika-gui-app/assemblies/assembly-macosx_x64.xml @@ -40,6 +40,11 @@ + + ${project.basedir}/examples/example-digipres-metadata-mappings-jdbc.csv + default-metadata-mappings.csv + config + ${project.basedir}/target/jres/macosx_x64/zulu17.38.21-ca-fx-jre17.0.5-macosx_x64.zip jre diff --git a/tika-gui-app/assemblies/assembly-win_x64.xml b/tika-gui-app/assemblies/assembly-win_x64.xml index 5d05e83..bc3717c 100644 --- a/tika-gui-app/assemblies/assembly-win_x64.xml +++ b/tika-gui-app/assemblies/assembly-win_x64.xml @@ -44,6 +44,11 @@ + + ${project.basedir}/examples/example-digipres-metadata-mappings-jdbc.csv + default-metadata-mappings.csv + config + ${project.basedir}/bin/tika-gui.bat diff --git a/tika-gui-app/examples/example-digipres-metadata-mappings-csv.csv b/tika-gui-app/examples/example-digipres-metadata-mappings-csv.csv deleted file mode 100644 index e772520..0000000 --- a/tika-gui-app/examples/example-digipres-metadata-mappings-csv.csv +++ /dev/null @@ -1,40 +0,0 @@ -tika,output -X-TIKA:digest:SHA256,sha256 -X-TIKA:encrypted,encrypted -X-TIKA:origResourceName,orig_name -resourceName,name -X-TIKA:embedded_depth,embedded_depth -X-TIKA:embedded_resource_path,embedded_path -embeddedResourceType,embedded_type -Content-Length,length -Content-Type,mime -sf:pronom:mime,sf_mime -sf:pronom:format,sf_format -sf:pronom:version,sf_version -sf:pronom:id,sf_id -sf:pronom:basis,sf_basis -sf:errors,sf_errors -dcterms:modified,modified -location,location -X-TIKA:EXCEPTION:container_exception,exception -X-TIKA:EXCEPTION:embedded_exception,embedded_exception -tika-eval:numAlphaTokens,num_alpha_tokens -tika-eval:lang,detected_lang -tika-eval:oov,oov -xmpTPg:NPages,num_pages -pdf:totalUnmappedUnicodeChars,pdf_num_unmapped_unicode -pdf:overallPercentageUnmappedUnicodeChars,pdf_percent_unmapped_unicode -pdf:containsNonEmbeddedFont,pdf_non_embedded_font -pdf:containsDamagedFont,pdf_damaged_font -pdf:hasAcroFormFields,pdf_has_acroform -pdf:hasCollection,pdf_has_collection -pdf:hasMarkedContent,pdf_has_marked_content -pdf:hasXFA,pdf_has_xfa -pdf:hasXMP,pdf_has_xmp -pdf:PDFExtensionVersion,pdf_extension_version -pdf:PDFVersion,pdf_version -pdf:producer,pdf_producer -pdfa:PDFVersion,pdfa_version -pdfaid:conformance,pdfaid_conformance -pdfx:conformance,pdfx_conformance -xmp:CreatorTool,xmp_creator_tool \ No newline at end of file diff --git a/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv b/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv index b1beb85..1b07c1d 100644 --- a/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv +++ b/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv @@ -37,4 +37,6 @@ pdf:producer,pdf_producer,VARCHAR(512) pdfa:PDFVersion,pdfa_version,VARCHAR(32) pdfaid:conformance,pdfaid_conformance,VARCHAR(12) pdfx:conformance,pdfx_conformance,VARCHAR(12) -xmp:CreatorTool,xmp_creator_tool,VARCHAR(128) \ No newline at end of file +xmp:CreatorTool,xmp_creator_tool,VARCHAR(128) +csv:num_rows,csv_rows,INTEGER +csv:num_columns,csv_columns,INTEGER \ No newline at end of file diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java index be8a071..e151832 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java @@ -27,5 +27,7 @@ public class Constants { //used as working directory for csv emitter public static final String BASE_PATH = "basePath"; + public static final String DEFAULT_METADATA_MAPPINGS = "default-metadata-mappings.csv"; + } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java index a20802c..8cdf3e3 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java @@ -41,6 +41,7 @@ import org.apache.commons.csv.CSVRecord; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.Constants; import org.tallison.tika.app.fx.ControllerBase; import org.tallison.tika.app.fx.batch.BatchProcessConfig; import org.tallison.tika.app.fx.ctx.AppContext; @@ -86,6 +87,24 @@ protected void safelySetCsvMetadataPath(String csvMetadataFilePath) { } } + protected void tryToLoadDefaultMetadataMappings() { + Path defaultMetadataMappingCSV = + AppContext.CONFIG_PATH.resolve(Constants.DEFAULT_METADATA_MAPPINGS); + if (! Files.exists(defaultMetadataMappingCSV)) { + LOGGER.debug("Can't find default metadata mappings path: {}", + defaultMetadataMappingCSV.toAbsolutePath().toString()); + return; + } + try { + LOGGER.debug("loading default csv"); + loadMetadataCSV(defaultMetadataMappingCSV.toFile()); + } catch (IOException e) { + LOGGER.warn("couldn't load default metadata mappings file " + + defaultMetadataMappingCSV.toAbsolutePath(), e); + } + + } + protected Optional getCsvMetadataPath() { return csvMetadataPath; } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java index 21ed0a6..c5e2307 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java @@ -144,6 +144,7 @@ public void selectCSVOutputDirectory(ActionEvent actionEvent) { } this.csvWorkingDirectory = Optional.of(directory.toPath()); this.csvDirectory.setText(directory.getName()); + saveState(false); } @@ -204,9 +205,15 @@ public void validateCSV(ActionEvent actionEvent) { } if (getMetadataRows().size() == 0) { - alert(ALERT_TITLE, "Metadata Not Configured", "Need to configure metadata"); - csvAccordion.setExpandedPane(csvAccordion.getPanes().get(1)); - return; + LOGGER.debug("metadata rows size == 0"); + tryToLoadDefaultMetadataMappings(); + LOGGER.debug("after trying default metadata mappings: {}", getMetadataRows().size()); + if (getMetadataRows().size() == 0) { + LOGGER.debug("metadata rows size still == 0"); + alert(ALERT_TITLE, "Metadata Not Configured", "Need to configure metadata"); + csvAccordion.setExpandedPane(csvAccordion.getPanes().get(1)); + return; + } } ValidationResult validationResult = validateMetadataRows(); if (validationResult != ValidationResult.OK) { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterSpec.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterSpec.java index 0860094..3a7d876 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterSpec.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterSpec.java @@ -50,26 +50,18 @@ public class CSVEmitterSpec extends JDBCEmitterSpec { private Optional tmpDbDirectory = Optional.empty(); private Optional csvDirectory = Optional.empty(); private Optional csvFileName = Optional.empty(); + private volatile boolean closed = false; public CSVEmitterSpec(@JsonProperty("metadataTuples") List metadataTuples) { super(metadataTuples); setTableName(CSV_DB_TABLE_NAME); } - private static void writeRow(ResultSet rs, CSVPrinter printer, List cells, - int columnCount) throws SQLException, IOException { - for (int i = 1; i <= columnCount; i++) { - String val = rs.getString(i); - if (rs.wasNull()) { - val = StringUtils.EMPTY; - } - cells.add(val); - } - printer.printRecord(cells); - } - @Override public ValidationResult initialize() throws IOException { + if (closed) { + throw new IOException("This csv emitter has been closed"); + } tmpDbDirectory = Optional.of(Files.createTempDirectory("tika-app-csv-tmp")); LOGGER.debug("tmp db directory: {}", tmpDbDirectory.get().toAbsolutePath()); setConnectionString("jdbc:sqlite:" + tmpDbDirectory.get().toAbsolutePath() + @@ -92,7 +84,7 @@ public Optional getCsvDirectory() { } public void setCsvDirectory(Path csvDirectory) { - this.csvDirectory = Optional.of(csvDirectory); + this.csvDirectory = Optional.ofNullable(csvDirectory); } private void createTable() throws SQLException { @@ -122,12 +114,17 @@ private void createTable() throws SQLException { @Override public void close() throws IOException { + //avoid double closures -- TODO figure out a more elegant way of handling this + if (closed) { + return; + } try { writeCSV(); } catch (IOException e) { LOGGER.warn("problem writing csv", e); } finally { cleanCSVTempResources(); + closed = true; } } @@ -235,4 +232,16 @@ private void cleanCSVTempResources() throws IOException { } FileUtils.deleteDirectory(tmpDbDirectory.get().toFile()); } + + private static void writeRow(ResultSet rs, CSVPrinter printer, List cells, + int columnCount) throws SQLException, IOException { + for (int i = 1; i <= columnCount; i++) { + String val = rs.getString(i); + if (rs.wasNull()) { + val = StringUtils.EMPTY; + } + cells.add(val); + } + printer.printRecord(cells); + } }