diff --git a/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv b/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv
index 1b07c1d..cbda45a 100644
--- a/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv
+++ b/tika-gui-app/examples/example-digipres-metadata-mappings-jdbc.csv
@@ -22,6 +22,8 @@ tika-eval:numAlphaTokens,num_alpha_tokens,BIGINT
tika-eval:lang,detected_lang,VARCHAR(12)
tika-eval:oov,oov,FLOAT
xmpTPg:NPages,num_pages,INTEGER
+pdf:incrementalUpdateCount,pdf_incremental_updates,INTEGER
+X-TIKA:versionNumber,version_number,INTEGER
pdf:totalUnmappedUnicodeChars,pdf_num_unmapped_unicode,INTEGER
pdf:overallPercentageUnmappedUnicodeChars,pdf_percent_unmapped_unicode,FLOAT
pdf:containsNonEmbeddedFont,pdf_non_embedded_font,BOOLEAN
diff --git a/tika-gui-app/examples/tika-parsers.xml b/tika-gui-app/examples/tika-parsers.xml
index c5f9559..6eb050c 100644
--- a/tika-gui-app/examples/tika-parsers.xml
+++ b/tika-gui-app/examples/tika-parsers.xml
@@ -13,13 +13,13 @@
0.3
true
true
- false
+ true
true
true
2.5
true
- false
+ true
true
false
false
diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java
index 3098bd0..4e9e81b 100644
--- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java
+++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java
@@ -142,9 +142,9 @@ private void appendDetectors(BatchProcessConfig batchProcessConfig, DomWriter wr
//don't do anything, right(?)
} else {
//this is a total hack.
- try {
- XMLStringToDOM.write(writer, properties,
- batchProcessConfig.getDetectorConfig().get().getPath());
+ try (InputStream is = Files.newInputStream(
+ batchProcessConfig.getDetectorConfig().get().getPath().get())) {
+ XMLStringToDOM.write(writer, properties, is);
} catch (TikaException | IOException | SAXException e) {
LOGGER.warn("couldn't write dom");
}
@@ -155,28 +155,20 @@ private void appendParsers(BatchProcessConfig batchProcessConfig, DomWriter writ
Element properties) throws XMLStreamException {
if (batchProcessConfig.getParserConfig().isEmpty() ||
batchProcessConfig.getParserConfig().get().getPath().isEmpty()) {
- Element parsers = writer.createAndGetElement(properties, "parsers");
- Element defaultParser = writer.createAndGetElement(parsers, "parser");
- defaultParser.setAttribute("class", "org.apache.tika.parser.DefaultParser");
- excludeParsers(writer, defaultParser, "org.apache.tika.parser.ocr.TesseractOCRParser",
- "org.apache.tika.parser.pdf.PDFParser",
- "org.apache.tika.parser.microsoft.ooxml.OOXMLParser",
- "org.apache.tika.parser.microsoft.OfficeParser");
- addLegacyParams(writer, parsers, "parser", "org.apache.tika.parser.pdf.PDFParser",
- "extractActions", "bool", "true");
-
- addLegacyParams(writer, parsers, "parser",
- "org.apache.tika.parser.microsoft.ooxml.OOXMLParser", "extractMacros", "bool",
- "true", "includeDeletedContent", "bool", "true", "includeMoveFromContent",
- "bool", "true");
-
- addLegacyParams(writer, parsers, "parser",
- "org.apache.tika.parser.microsoft.OfficeParser", "extractMacros", "bool", "true");
+ //this is a total hack.
+ try (InputStream is =
+ TikaConfigWriter.class.getResourceAsStream("/default_parsers.xml")) {
+ XMLStringToDOM.write(writer, properties, is);
+ } catch (TikaException | IOException | SAXException e) {
+ LOGGER.warn("couldn't write dom");
+ }
} else {
//this is a total hack.
- try {
- XMLStringToDOM.write(writer, properties,
- batchProcessConfig.getParserConfig().get().getPath());
+ try (InputStream is =
+ Files.newInputStream(
+ batchProcessConfig.getParserConfig().get().getPath().get())) {
+
+ XMLStringToDOM.write(writer, properties, is);
} catch (TikaException | IOException | SAXException e) {
LOGGER.warn("couldn't write dom");
}
diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/sax/XMLStringToDOM.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/sax/XMLStringToDOM.java
index cba7031..4b7cd84 100644
--- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/sax/XMLStringToDOM.java
+++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/sax/XMLStringToDOM.java
@@ -17,8 +17,7 @@
package org.tallison.tika.app.fx.sax;
import java.io.IOException;
-import java.nio.file.Path;
-import java.util.Optional;
+import java.io.InputStream;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
@@ -29,9 +28,9 @@
public class XMLStringToDOM {
- public static void write(DomWriter writer, Element writerRoot, Optional path)
+ public static void write(DomWriter writer, Element writerRoot, InputStream is)
throws TikaException, IOException, SAXException {
- Document document = XMLReaderUtils.buildDOM(path.get());
+ Document document = XMLReaderUtils.buildDOM(is);
Element documentRoot = document.getDocumentElement();
writer.appendChild(writerRoot, documentRoot);
}
diff --git a/tika-gui-app/src/main/resources/default_parsers.xml b/tika-gui-app/src/main/resources/default_parsers.xml
new file mode 100644
index 0000000..6947fb8
--- /dev/null
+++ b/tika-gui-app/src/main/resources/default_parsers.xml
@@ -0,0 +1,77 @@
+
+
+
+
+
+
+
+
+
+
+
+
+ true
+ 0.3
+ true
+ true
+ true
+
+ true
+ true
+ 2.5
+ true
+ true
+ true
+ false
+ false
+ true
+ false
+ -1
+
+ 10000
+ 300
+ png
+ 1.0
+ ALL
+ auto
+ better
+ gray
+
+ false
+ false
+ false
+ 0.5
+ false
+
+
+
+
+ true
+ true
+ true
+
+
+
+
+ true
+
+
+
\ No newline at end of file