diff --git a/README.md b/README.md index b28a21c..66ca1c9 100644 --- a/README.md +++ b/README.md @@ -60,3 +60,4 @@ The release is currently triggered by pushing a tag starting with 'v'. We should figure out how to sign artifacts and offer sha256s. Again, see jreleaser above. Further, short of going the full jreleaser route, we should start using: appassembler-maven-plugin. +x; \ No newline at end of file diff --git a/tika-gui-app/src/main/java/module-info.java b/tika-gui-app/src/main/java/module-info.java index 9165435..fc3d2fc 100644 --- a/tika-gui-app/src/main/java/module-info.java +++ b/tika-gui-app/src/main/java/module-info.java @@ -34,9 +34,9 @@ opens org.tallison.tika.app.fx to javafx.fxml, com.fasterxml.jackson.databind; opens org.tallison.tika.app.fx.ctx to com.fasterxml.jackson.databind, javafx.fxml; opens org.tallison.tika.app.fx.status to javafx.base; - opens org.tallison.tika.app.fx.tools to com.fasterxml.jackson.databind, javafx.fxml; - opens org.tallison.tika.app.fx.metadata to com.fasterxml.jackson.databind, javafx.fxml, - javafx.base; + opens org.tallison.tika.app.fx.metadata to com.fasterxml.jackson.databind, javafx.fxml, javafx.base; exports org.tallison.tika.app.fx.emitters; opens org.tallison.tika.app.fx.emitters to com.fasterxml.jackson.databind, javafx.fxml; + opens org.tallison.tika.app.fx.config to com.fasterxml.jackson.databind, javafx.fxml; + opens org.tallison.tika.app.fx.batch to com.fasterxml.jackson.databind, javafx.fxml; } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/AdvancedBatchController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/AdvancedBatchController.java index 6e830a3..97218fd 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/AdvancedBatchController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/AdvancedBatchController.java @@ -28,8 +28,8 @@ import javafx.scene.input.MouseEvent; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; import org.tallison.tika.app.fx.ctx.AppContext; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; import org.apache.tika.utils.StringUtils; @@ -68,14 +68,12 @@ public void initialize(URL fxmlFileLocation, ResourceBundle resources) { if (batchProcessConfig.getDigest().isPresent()) { digestOptions.getSelectionModel().select(batchProcessConfig.getDigest().get()); } - parseTimeoutSeconds.setText( - Integer.toString(batchProcessConfig.getParseTimeoutSeconds())); - memoryPerProcess.setText( - Integer.toString(batchProcessConfig.getMaxMemMb()) - ); + parseTimeoutSeconds.setText(Integer.toString(batchProcessConfig.getParseTimeoutSeconds())); + memoryPerProcess.setText(Integer.toString(batchProcessConfig.getMaxMemMb())); numProcesses.setText(Integer.toString(batchProcessConfig.getNumProcesses())); - perFileEmitThresholdMb.setText(Integer.toString(batchProcessConfig.getPerFileEmitThresholdMb())); + perFileEmitThresholdMb.setText( + Integer.toString(batchProcessConfig.getPerFileEmitThresholdMb())); totalEmitThresholdMb.setText(Integer.toString(batchProcessConfig.getTotalEmitThesholdMb())); emitWithinMs.setText(Long.toString(batchProcessConfig.getEmitWithinMs())); @@ -141,8 +139,7 @@ public void saveState() { APP_CONTEXT.saveState(); } - private int getInt(String label, TextField textField, int min, int max, - int defaultVal) { + private int getInt(String label, TextField textField, int min, int max, int defaultVal) { String txt = textField.getText(); if (StringUtils.isBlank(txt)) { @@ -166,8 +163,7 @@ private int getInt(String label, TextField textField, int min, int max, return num; } - private long getLong(String label, TextField textField, long min, long max, - long defaultVal) { + private long getLong(String label, TextField textField, long min, long max, long defaultVal) { String txt = textField.getText(); if (StringUtils.isBlank(txt)) { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchInputController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchInputController.java index 398b434..4148356 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchInputController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchInputController.java @@ -28,9 +28,9 @@ import javafx.stage.Window; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator; import org.apache.tika.utils.StringUtils; @@ -47,7 +47,7 @@ public void fileSystemInputDirectorySelect(ActionEvent actionEvent) { final Window parent = ((Node) actionEvent.getTarget()).getScene().getWindow(); DirectoryChooser directoryChooser = new DirectoryChooser(); directoryChooser.setTitle("Open Resource File"); - if (! APP_CONTEXT.getBatchProcessConfig().isPresent()) { + if (!APP_CONTEXT.getBatchProcessConfig().isPresent()) { LOGGER.warn("BatchProcessConfig must not be empty"); actionEvent.consume(); return; @@ -71,12 +71,11 @@ public void fileSystemInputDirectorySelect(ActionEvent actionEvent) { String shortLabel = "FileSystem: " + ellipsize(directory.getName(), 30); String fullLabel = "FileSystem: " + directory.getAbsolutePath(); - batchProcessConfig.setFetcher(shortLabel, fullLabel, - Constants.FS_FETCHER_CLASS, "basePath", + batchProcessConfig.setFetcher(shortLabel, fullLabel, Constants.FS_FETCHER_CLASS, "basePath", directory.toPath().toAbsolutePath().toString()); batchProcessConfig.setPipesIterator(shortLabel, fullLabel, - FileSystemPipesIterator.class.getName(), - "basePath", directory.toPath().toAbsolutePath().toString()); + FileSystemPipesIterator.class.getName(), "basePath", + directory.toPath().toAbsolutePath().toString()); batchProcessConfig.setInputSelectedTab(0); APP_CONTEXT.saveState(); ((Stage) fsInputButton.getScene().getWindow()).close(); diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchStatusController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchStatusController.java index ed5df75..869b636 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchStatusController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/BatchStatusController.java @@ -41,9 +41,9 @@ import javafx.scene.input.MouseEvent; import javafx.scene.paint.Color; import javafx.util.StringConverter; +import org.tallison.tika.app.fx.batch.BatchProcess; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.status.StatusCount; -import org.tallison.tika.app.fx.tools.BatchProcess; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.async.AsyncStatus; @@ -52,61 +52,45 @@ public class BatchStatusController implements Initializable { private static Map PIPES_STATUS_LOOKUP = new HashMap<>(); + private static String UNPROCESSED_COLOR = "0066cc"; + private static Map COLORS = + Map.of(PipesResult.STATUS.PARSE_SUCCESS, "009900", + PipesResult.STATUS.PARSE_SUCCESS_WITH_EXCEPTION, "ffff00", + PipesResult.STATUS.EMIT_SUCCESS, "009900", PipesResult.STATUS.TIMEOUT, "ff9900", + PipesResult.STATUS.UNSPECIFIED_CRASH, "ff0000", PipesResult.STATUS.OOM, + "ff8000", PipesResult.STATUS.CLIENT_UNAVAILABLE_WITHIN_MS, "", + PipesResult.STATUS.INTERRUPTED_EXCEPTION, "", PipesResult.STATUS.EMPTY_OUTPUT, + "ffe6cc", PipesResult.STATUS.PARSE_EXCEPTION_EMIT, "" + //TODO -- fill out rest? + ); static { - Arrays.stream(PipesResult.STATUS.values()).forEach( - s -> PIPES_STATUS_LOOKUP.put(s.name(), s)); + Arrays.stream(PipesResult.STATUS.values()) + .forEach(s -> PIPES_STATUS_LOOKUP.put(s.name(), s)); } - private static String UNPROCESSED_COLOR = "0066cc"; - - private static Map COLORS = Map.of( - PipesResult.STATUS.PARSE_SUCCESS, "009900", - PipesResult.STATUS.PARSE_SUCCESS_WITH_EXCEPTION, "ffff00", - PipesResult.STATUS.EMIT_SUCCESS, "009900", - PipesResult.STATUS.TIMEOUT, "ff9900", - PipesResult.STATUS.UNSPECIFIED_CRASH, "ff0000", - PipesResult.STATUS.OOM, "ff8000", - PipesResult.STATUS.CLIENT_UNAVAILABLE_WITHIN_MS, "", - PipesResult.STATUS.INTERRUPTED_EXCEPTION, "", - PipesResult.STATUS.EMPTY_OUTPUT, "ffe6cc", - PipesResult.STATUS.PARSE_EXCEPTION_EMIT, "" - //TODO -- fill out rest? - ); + @FXML + private final ObservableList statusCounts = FXCollections.observableArrayList(); + private final Label pieSliceCaption = new Label(""); @FXML PieChart statusPieChart; - @FXML TextField totalToProcess; - @FXML TextField totalProcessed; - @FXML TextField overallStatus; - @FXML TableColumn countColumn; - @FXML TableView statusTable; - - @FXML - private final ObservableList statusCounts = FXCollections.observableArrayList(); - + ObservableList pieChartData = FXCollections.observableArrayList(); + private Thread updaterThread; public ObservableList getStatusCounts() { return statusCounts; } - - private Thread updaterThread; - - private final Label pieSliceCaption = new Label(""); - - - ObservableList pieChartData = FXCollections.observableArrayList(); - @Override public void initialize(URL fxmlFileLocation, ResourceBundle resources) { pieSliceCaption.setTextFill(Color.DARKORANGE); @@ -129,7 +113,7 @@ public void stop() { private void updateStatusTable() { //remove 0 entries - statusCounts.removeIf( e -> e.getCount() < 0.1); + statusCounts.removeIf(e -> e.getCount() < 0.1); statusTable.sort(); statusTable.refresh(); } @@ -146,7 +130,7 @@ public void run() { if (batchProcess.isPresent()) { final Optional status = batchProcess.get().checkAsyncStatus(); - if (! status.isEmpty()) { + if (!status.isEmpty()) { Platform.runLater(() -> { updatePieChart(status.get()); updateTotalToProcess(status.get()); @@ -206,7 +190,7 @@ private void updatePieChart(AsyncStatus status) { } } for (Map.Entry e : status.getStatusCounts().entrySet()) { - if (! seen.contains(e.getKey())) { + if (!seen.contains(e.getKey())) { addData(e.getKey().name(), e.getValue()); } } @@ -226,9 +210,10 @@ private void addData(String name, double value) { } } - data.getNode().addEventHandler(MouseEvent.MOUSE_PRESSED, - new EventHandler() { - @Override public void handle(MouseEvent e) { + data.getNode() + .addEventHandler(MouseEvent.MOUSE_PRESSED, new EventHandler() { + @Override + public void handle(MouseEvent e) { pieSliceCaption.setTranslateX(e.getSceneX()); pieSliceCaption.setTranslateY(e.getSceneY()); pieSliceCaption.setText(String.valueOf(data.getPieValue()) + "%"); @@ -239,9 +224,10 @@ private void addData(String name, double value) { statusCount.countProperty().bind(data.pieValueProperty()); statusCounts.add(statusCount); } + private PipesResult.STATUS lookup(String name) { - if (! PIPES_STATUS_LOOKUP.containsKey(name)) { + if (!PIPES_STATUS_LOOKUP.containsKey(name)) { return null; } return PIPES_STATUS_LOOKUP.get(name); diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java index f6ed917..7452141 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/Constants.java @@ -24,8 +24,7 @@ public class Constants { public static final String FS_FETCHER_CLASS = FileSystemFetcher.class.getName(); //an imaginary class - public static final String CSV_EMITTER_CLASS = - "org.apache.tika.pipes.emitter.csv.CSVEmitter"; + public static final String CSV_EMITTER_CLASS = "org.apache.tika.pipes.emitter.csv.CSVEmitter"; public static final String OPEN_SEARCH_EMITTER_CLASS = "org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitter"; diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaApplication.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaApplication.java index 7bb4ad3..44df3bb 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaApplication.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaApplication.java @@ -30,8 +30,8 @@ import javafx.scene.layout.VBox; import javafx.stage.Stage; import javafx.stage.WindowEvent; +import org.tallison.tika.app.fx.batch.BatchProcess; import org.tallison.tika.app.fx.ctx.AppContext; -import org.tallison.tika.app.fx.tools.BatchProcess; public class TikaApplication extends Application { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaController.java index dbfff10..ebe66bd 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/TikaController.java @@ -38,11 +38,11 @@ import javafx.stage.WindowEvent; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.batch.BatchProcess; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.status.StatusUpdater; -import org.tallison.tika.app.fx.tools.BatchProcess; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; public class TikaController extends ControllerBase { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcess.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcess.java similarity index 94% rename from tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcess.java rename to tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcess.java index 1df2fcd..34f7532 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcess.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcess.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.tallison.tika.app.fx.tools; +package org.tallison.tika.app.fx.batch; import java.io.File; import java.io.IOException; @@ -36,6 +36,7 @@ import com.fasterxml.jackson.datatype.jsr310.JavaTimeModule; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.config.TikaConfigWriter; import org.tallison.tika.app.fx.csv.CSVEmitterHelper; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.status.MutableStatus; @@ -49,28 +50,16 @@ public class BatchProcess { - public enum STATUS { - READY, ERROR, RUNNING, COMPLETE, CANCELED; - } - - private enum PROCESS_ID { - BATCH_PROCESS - } - private static Logger LOGGER = LogManager.getLogger(BatchProcess.class); - private final MutableStatus mutableStatus = new MutableStatus(STATUS.READY); private long runningProcessId = -1; private Path configFile; private BatchRunner batchRunner = null; - private BatchProcessConfig batchProcessConfig = null; - private Optional jvmException = Optional.empty(); private Optional jvmErrorMsg = Optional.empty(); - private ObjectMapper objectMapper = JsonMapper.builder() - .addModule(new JavaTimeModule()) - .build(); + private ObjectMapper objectMapper = + JsonMapper.builder().addModule(new JavaTimeModule()).build(); private ExecutorService daemonExecutorService = Executors.newFixedThreadPool(2, r -> { Thread t = Executors.defaultThreadFactory().newThread(r); t.setDaemon(true); @@ -79,8 +68,8 @@ private enum PROCESS_ID { private ExecutorCompletionService executorCompletionService = new ExecutorCompletionService<>(daemonExecutorService); - public synchronized void start(BatchProcessConfig batchProcessConfig, StatusUpdater statusUpdater) - throws TikaException, IOException { + public synchronized void start(BatchProcessConfig batchProcessConfig, + StatusUpdater statusUpdater) throws TikaException, IOException { deletePreviousRuns(); TikaConfigWriter tikaConfigWriter = new TikaConfigWriter(); @@ -148,7 +137,7 @@ public synchronized void cancel() { } public Optional checkAsyncStatus() { - if (! Files.isRegularFile(AppContext.BATCH_STATUS_PATH)) { + if (!Files.isRegularFile(AppContext.BATCH_STATUS_PATH)) { return Optional.empty(); } try { @@ -191,8 +180,6 @@ public void close() { } } - //If the emitter is a csv file, - public long getRunningProcessId() { return runningProcessId; } @@ -201,6 +188,8 @@ public MutableStatus getMutableStatus() { return mutableStatus; } + //If the emitter is a csv file, + public Optional getJvmException() { return jvmException; } @@ -209,6 +198,14 @@ public Optional getJvmErrorMsg() { return jvmErrorMsg; } + public enum STATUS { + READY, ERROR, RUNNING, COMPLETE, CANCELED; + } + + private enum PROCESS_ID { + BATCH_PROCESS + } + private class BatchRunner implements Callable { private final Path tikaConfig; private final BatchProcessConfig batchProcessConfig; @@ -222,9 +219,8 @@ public BatchRunner(Path tikaConfig, BatchProcessConfig batchProcessConfig) { @Override public Integer call() throws Exception { List commandLine = buildCommandLine(); - process = new ProcessBuilder(commandLine) - .inheritIO() //TODO -- for dev purposes only - .start(); + process = new ProcessBuilder(commandLine).inheritIO() //TODO -- for dev purposes only + .start(); mutableStatus.set(STATUS.RUNNING); if (LOGGER.isTraceEnabled()) { LOGGER.trace("process {}", process.isAlive()); @@ -278,8 +274,8 @@ public Integer call() throws Exception { private List buildCommandLine() { List commandLine = new ArrayList<>(); - commandLine.add( - ProcessUtils.escapeCommandLine(AppContext.getInstance().getJavaHome().resolve("java").toString())); + commandLine.add(ProcessUtils.escapeCommandLine( + AppContext.getInstance().getJavaHome().resolve("java").toString())); commandLine.add("-Dlog4j.configurationFile=config/log4j2-async-cli.xml"); commandLine.add("-cp"); String cp = buildClassPath(); diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcessConfig.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcessConfig.java similarity index 95% rename from tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcessConfig.java rename to tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcessConfig.java index b4b8e10..f64756a 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/BatchProcessConfig.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/batch/BatchProcessConfig.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.tallison.tika.app.fx.tools; +package org.tallison.tika.app.fx.batch; import static org.tallison.tika.app.fx.Constants.CSV_JDBC_CONNECTION_STRING; import static org.tallison.tika.app.fx.Constants.JDBC_CONNECTION_STRING; @@ -28,6 +28,7 @@ import javafx.beans.property.SimpleStringProperty; import javafx.beans.property.StringProperty; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.apache.tika.utils.ProcessUtils; @@ -57,9 +58,9 @@ public class BatchProcessConfig { private int parseTimeoutSeconds = 120; - private int perFileEmitThresholdMb = 100; + private int perFileEmitThresholdMb = 1; - private int totalEmitThesholdMb = 1000; + private int totalEmitThesholdMb = 100; private long emitWithinMs = 10000; @@ -140,7 +141,8 @@ public void appendPipesClasspath(StringBuilder sb) { .toAbsolutePath() + "/*")); } else if (emitter.getClazz().equals(Constants.JDBC_EMITTER_CLASS) || emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { - sb.append(AppContext.TIKA_LIB_PATH.resolve("tika-emitter-jdbc").toAbsolutePath() + "/*"); + sb.append(AppContext.TIKA_LIB_PATH.resolve("tika-emitter-jdbc").toAbsolutePath() + + "/*"); sb.append(File.pathSeparator); String connectString = emitter.getClazz().equals(JDBC_EMITTER_CLASS) ? getEmitter().get().getAttributes().get(JDBC_CONNECTION_STRING) : @@ -151,7 +153,8 @@ public void appendPipesClasspath(StringBuilder sb) { } else if (connectString.startsWith("jdbc:h2")) { sb.append(AppContext.TIKA_LIB_PATH.resolve("db/h2").toAbsolutePath() + "/*"); } else if (connectString.startsWith("jdbc:postgres")) { - sb.append(AppContext.TIKA_LIB_PATH.resolve("db/postgresql").toAbsolutePath() + "/*"); + sb.append(AppContext.TIKA_LIB_PATH.resolve("db/postgresql").toAbsolutePath() + + "/*"); } } } @@ -189,22 +192,22 @@ public void setMaxMemMb(int maxMemMb) { this.maxMemMb = maxMemMb; } - public void setOutputSelectedTab(int outputSelectedTab) { - this.outputSelectedTab = outputSelectedTab; - } - - public void setInputSelectedTab(int inputSelectedTab) { - this.inputSelectedTab = inputSelectedTab; - } - public int getOutputSelectedTab() { return outputSelectedTab; } + public void setOutputSelectedTab(int outputSelectedTab) { + this.outputSelectedTab = outputSelectedTab; + } + public int getInputSelectedTab() { return inputSelectedTab; } + public void setInputSelectedTab(int inputSelectedTab) { + this.inputSelectedTab = inputSelectedTab; + } + public int getPerFileEmitThresholdMb() { return perFileEmitThresholdMb; } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/ConfigItem.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/ConfigItem.java similarity index 98% rename from tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/ConfigItem.java rename to tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/ConfigItem.java index ced0aa0..6ba3807 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/ConfigItem.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/ConfigItem.java @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.tallison.tika.app.fx.tools; +package org.tallison.tika.app.fx.config; import java.util.HashMap; import java.util.List; @@ -38,14 +38,6 @@ public ConfigItem() { } - public static ConfigItem build(String... args) { - Map params = new HashMap<>(); - for (int i = 3; i < args.length; i++) { - params.put(args[i], args[++i]); - } - return new ConfigItem(args[0], args[1], args[2], params); - } - public ConfigItem(String shortLabel, String fullLabel, String clazz, Map attributes) { this.shortLabel = shortLabel; @@ -54,6 +46,14 @@ public ConfigItem(String shortLabel, String fullLabel, String clazz, this.attributes = attributes; } + public static ConfigItem build(String... args) { + Map params = new HashMap<>(); + for (int i = 3; i < args.length; i++) { + params.put(args[i], args[++i]); + } + return new ConfigItem(args[0], args[1], args[2], params); + } + public String getShortLabel() { return shortLabel; } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java new file mode 100644 index 0000000..5e9f91b --- /dev/null +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/config/TikaConfigWriter.java @@ -0,0 +1,513 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.tallison.tika.app.fx.config; + +import static org.tallison.tika.app.fx.Constants.BASE_PATH; +import static org.tallison.tika.app.fx.Constants.CSV_EMITTER_CLASS; +import static org.tallison.tika.app.fx.Constants.CSV_JDBC_CONNECTION_STRING; +import static org.tallison.tika.app.fx.Constants.CSV_JDBC_INSERT_SQL; +import static org.tallison.tika.app.fx.Constants.FS_FETCHER_CLASS; +import static org.tallison.tika.app.fx.Constants.JDBC_CONNECTION_STRING; +import static org.tallison.tika.app.fx.Constants.JDBC_EMITTER_CLASS; +import static org.tallison.tika.app.fx.Constants.JDBC_INSERT_SQL; +import static org.tallison.tika.app.fx.Constants.NO_DIGEST; +import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_PW; +import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_UPDATE_STRATEGY; +import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_URL; +import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_USER; + +import java.io.File; +import java.io.IOException; +import java.io.InputStream; +import java.io.Writer; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.StandardOpenOption; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.stream.XMLStreamException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.apache.commons.io.IOUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.ctx.AppContext; +import org.tallison.tika.app.fx.metadata.MetadataTuple; +import org.w3c.dom.Document; +import org.w3c.dom.Element; + +import org.apache.tika.utils.ProcessUtils; +import org.apache.tika.utils.StringUtils; + +/** + * This is an embarrassment of hardcoding. Need to figure out better + * solution... + *

+ * This also requires knowledge of all fetchers/emitters in one class. This is, erm, + * less than entirely ideal. + *

+ * This is also does not escape xml characters. So, bad, very, very bad. + */ +public class TikaConfigWriter { + private static final Logger LOGGER = LogManager.getLogger(TikaConfigWriter.class); + + public void writeLog4j2() throws IOException { + String template = getTemplateLog4j2("log4j2-async.xml"); + template = + template.replace("{LOGS_PATH}", AppContext.LOGS_PATH.toAbsolutePath().toString()); + + if (!Files.isDirectory(AppContext.ASYNC_LOG4J2_PATH.getParent())) { + Files.createDirectories(AppContext.ASYNC_LOG4J2_PATH.getParent()); + } + Files.writeString(AppContext.ASYNC_LOG4J2_PATH, template, StandardCharsets.UTF_8, + StandardOpenOption.CREATE); + + //not actually a template + String xml = getTemplateLog4j2("log4j2-async-cli.xml"); + Files.writeString(AppContext.CONFIG_PATH.resolve("log4j2-async-cli.xml"), xml, + StandardCharsets.UTF_8, StandardOpenOption.CREATE); + + } + + public Path writeConfig(BatchProcessConfig batchProcessConfig) throws IOException { + + return writeConfig(batchProcessConfig, AppContext.CONFIG_PATH); + } + + + public Path writeConfig(BatchProcessConfig batchProcessConfig, Path workingDir) + throws IOException { + if (!Files.isDirectory(workingDir)) { + Files.createDirectories(workingDir); + } + Path tmp = Files.createTempFile(workingDir, "tika-config-", ".xml"); + try { + DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); + Document document = dbf.newDocumentBuilder().newDocument(); + Element properties = document.createElement("properties"); + document.appendChild(properties); + appendParsers(batchProcessConfig, document, properties); + //sb.append(getTemplate("parsers.xml")).append("\n"); + appendMetadataFilter(batchProcessConfig, document, properties); + appendPipesIterator(batchProcessConfig, document, properties); + appendFetcher(batchProcessConfig, document, properties); + appendEmitter(batchProcessConfig, document, properties); + appendAsync(batchProcessConfig, document, properties); + appendAutoDetectParserConfig(batchProcessConfig, document, properties); + + Transformer transformer = TransformerFactory.newInstance().newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + try (Writer writer = Files.newBufferedWriter(tmp, StandardCharsets.UTF_8)) { + StreamResult result = new StreamResult(writer); + DOMSource source = new DOMSource(document); + transformer.transform(source, result); + } + } catch (XMLStreamException | TransformerException | ParserConfigurationException e) { + throw new IOException(e); + } + LOGGER.debug("Finished writing tika-config: {}", tmp.toAbsolutePath()); + return tmp; + } + + + private void appendParsers(BatchProcessConfig batchProcessConfig, Document document, + Element properties) throws XMLStreamException { + Element parsers = document.createElement("parsers"); + properties.appendChild(parsers); + Element dflt = document.createElement("parser"); + parsers.appendChild(dflt); + dflt.setAttribute("class", "org.apache.tika.parser.DefaultParser"); + excludeParsers(document, dflt, "org.apache.tika.parser.ocr.TesseractOCRParser", + "org.apache.tika.parser.pdf.PDFParser", + "org.apache.tika.parser.microsoft.ooxml.OOXMLParser", + "org.apache.tika.parser.microsoft.OfficeParser"); + addLegacyParams(document, parsers, "parser", "org.apache.tika.parser.pdf.PDFParser", + "extractActions", "bool", "true"); + + addLegacyParams(document, parsers, "parser", + "org.apache.tika.parser.microsoft.ooxml.OOXMLParser", "extractMacros", "bool", + "true", "includeDeletedContent", "bool", "true", "includeMoveFromContent", "bool", + "true"); + + addLegacyParams(document, parsers, "parser", + "org.apache.tika.parser.microsoft.OfficeParser", "extractMacros", "bool", "true"); + + + } + + private void addLegacyParams(Document document, Element parent, String nodeName, String clz, + String... tuples) { + //total hack + Element parser = createAndGetElement(document, parent, nodeName, "class", clz); + if (tuples.length == 0) { + return; + } + Element params = createAndGetElement(document, parser, "params"); + for (int i = 0; i < tuples.length; i += 3) { + appendTextElement(document, params, "param", tuples[i + 2], "name", tuples[i], "type", + tuples[i + 1]); + } + } + + private void excludeParsers(Document document, Element parser, String... classes) { + for (String clz : classes) { + createAndGetElement(document, parser, "parser-exclude", "class", clz); + } + } + + private void appendAutoDetectParserConfig(BatchProcessConfig batchProcessConfig, + Document document, Element properties) + throws IOException { + Element adpc = createAndGetElement(document, properties, "autoDetectParserConfig"); + Element params = createAndGetElement(document, adpc, "params"); + appendTextElement(document, params, "spoolToDisk", "0"); + appendTextElement(document, params, "outputThreshold", "10000"); + appendTextElement(document, params, "maximumCompressionRatio", "100"); + appendTextElement(document, params, "maximumDepth", "100"); + appendTextElement(document, params, "maximumPackageEntryDepth", "100"); + + //good enough for now. we'll have to figure out + //a better option if we add more params + if (batchProcessConfig.getDigest().isEmpty()) { + return; + } + String digestString = batchProcessConfig.getDigest().get(); + if (digestString.equals(NO_DIGEST)) { + return; + } + Element digester = createAndGetElement(document, adpc, "digesterFactory", "class", + "org.apache.tika.parser.digestutils.CommonsDigesterFactory"); + Element digesterParams = createAndGetElement(document, digester, "params"); + appendTextElement(document, digesterParams, "markLimit", "1000000"); + appendTextElement(document, digesterParams, "algorithmString", digestString); + + } + + private void appendAsync(BatchProcessConfig bpc, Document document, Element properties) + throws IOException { + Element async = createAndGetElement(document, properties, "async"); + Element params = createAndGetElement(document, async, "params"); + appendTextElement(document, params, "javaPath", + AppContext.getInstance().getJavaHome().resolve("java").toString()); + appendTextElement(document, params, "numClients", Integer.toString(bpc.getNumProcesses())); + appendTextElement(document, params, "numEmitters", "1"); + appendListElement(document, params, "forkedJvmArgs", "arg", + "-Xmx" + bpc.getMaxMemMb() + "m", "-Dlog4j.configurationFile=" + + AppContext.ASYNC_LOG4J2_PATH.toAbsolutePath().toString(), "-cp", + buildClassPath(bpc)); + appendTextElement(document, params, "timeoutMillis", + Long.toString(bpc.getParseTimeoutSeconds() * 1000 )); + appendTextElement(document, params, "emitWithinMillis", + Long.toString(bpc.getEmitWithinMs())); + appendTextElement(document, params, "emitMaxEstimatedBytes", + Long.toString(bpc.getTotalEmitThesholdMb() * 1024 * 1024)); + appendTextElement(document, params, "maxForEmitBatchBytes", + Long.toString(bpc.getPerFileEmitThresholdMb() * 1024 * 1024)); + + appendPipesReporters(document, async, bpc); + + } + + private void appendPipesReporters(Document document, Element async, BatchProcessConfig bpc) { + Element compositePipesReporter = + createAndGetElement(document, async, "pipesReporter", "class", + "org.apache.tika.pipes.CompositePipesReporter"); + Element params = createAndGetElement(document, compositePipesReporter, "params"); + Element pipesReporters = createAndGetElement(document, params, "pipesReporters", "class", + "org.apache.tika.pipes.PipesReporter"); + Element fsReporter = createAndGetElement(document, pipesReporters, "pipesReporter", "class", + "org.apache.tika.pipes.reporters.fs.FileSystemStatusReporter"); + Element fsReporterParams = createAndGetElement(document, fsReporter, "params"); + appendTextElement(document, fsReporterParams, "statusFile", + AppContext.BATCH_STATUS_PATH.toAbsolutePath().toString()); + appendTextElement(document, fsReporterParams, "reportUpdateMillis", "1000"); + + + //parameterize + if (bpc.getEmitter().isEmpty()) { + return; + } + ConfigItem emitter = bpc.getEmitter().get(); + if (!emitter.getClazz().equals(JDBC_EMITTER_CLASS) && + !emitter.getClazz().equals(CSV_EMITTER_CLASS)) { + return; + } + String connectionString = emitter.getAttributes().get(JDBC_CONNECTION_STRING); + if (emitter.getClazz().equals(CSV_EMITTER_CLASS)) { + connectionString = emitter.getAttributes().get(CSV_JDBC_CONNECTION_STRING); + } + Element jdbc = createAndGetElement(document, pipesReporters, "pipesReporter", "class", + "org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter"); + + Element jdbcParams = createAndGetElement(document, jdbc, "params"); + appendTextElement(document, jdbcParams, "connection", connectionString); + } + + private void appendListElement(Document document, Element parent, String itemNames, + String itemName, String... elements) { + Element items = createAndGetElement(document, parent, itemNames); + for (String element : elements) { + Element item = createAndGetElement(document, items, itemName); + item.setTextContent(element); + } + } + + private String buildClassPath(BatchProcessConfig batchProcessConfig) { + StringBuilder sb = new StringBuilder(); + //load these mappings from a properties file or something + sb.append(ProcessUtils.escapeCommandLine( + AppContext.TIKA_APP_BIN_PATH.toAbsolutePath() + "/*")); + sb.append(File.pathSeparator); + sb.append(ProcessUtils.escapeCommandLine( + AppContext.TIKA_EXTRAS_BIN_PATH.toAbsolutePath() + "/*")); + sb.append(File.pathSeparator); + batchProcessConfig.appendPipesClasspath(sb); + return sb.toString(); + } + + private void appendEmitter(BatchProcessConfig batchProcessConfig, Document document, + Element properties) throws IOException { + Optional optionalEmitter = batchProcessConfig.getEmitter(); + if (optionalEmitter.isEmpty()) { + LOGGER.warn("emitter is empty?!"); + return; + } + ConfigItem emitter = optionalEmitter.get(); + Element emitters = createAndGetElement(document, properties, "emitters"); + switch (emitter.getClazz()) { + case Constants.FS_EMITTER_CLASS: + appendFSEmitter(emitter, document, emitters); + break; + case Constants.OPEN_SEARCH_EMITTER_CLASS: + appendOpenSearchEmitter(emitter, document, emitters); + break; + case Constants.CSV_EMITTER_CLASS: + appendJDBCEmitter(emitter, emitter.getAttributes().get(CSV_JDBC_CONNECTION_STRING), + emitter.getAttributes().get(CSV_JDBC_INSERT_SQL), document, emitters); + break; + case Constants.JDBC_EMITTER_CLASS: + appendJDBCEmitter(emitter, emitter.getAttributes().get(JDBC_CONNECTION_STRING), + emitter.getAttributes().get(JDBC_INSERT_SQL), document, emitters); + break; + default: + throw new RuntimeException("I regret I don't yet support " + + batchProcessConfig.getEmitter().get().getClazz()); + } + } + + private void appendJDBCEmitter(ConfigItem emitter, String connectionString, String insertString, + Document document, Element emitters) throws IOException { + Element emitterElement = createAndGetElement(document, emitters, "emitter", "class", + "org.apache.tika.pipes.emitter.jdbc.JDBCEmitter"); + Element params = createAndGetElement(document, emitterElement, "params"); + appendTextElement(document, params, "name", "emitter"); + appendTextElement(document, params, "connection", connectionString); + appendTextElement(document, params, "insert", insertString); + appendTextElement(document, params, "attachmentStrategy", "all"); + + if (emitter.getMetadataTuples().isEmpty() || + emitter.getMetadataTuples().get().size() == 0) { + return; + } + Map map = new LinkedHashMap<>(); + emitter.getMetadataTuples().get().stream() + .forEach(e -> map.put(e.getOutput(), e.getProperty())); + + appendMap(document, params, "keys", "key", map); + } + + private void appendOpenSearchEmitter(ConfigItem emitter, Document document, Element emitters) + throws IOException { + Element emitterElement = createAndGetElement(document, emitters, "emitter", "class", + "org.apache.tika.pipes.emitter.opensearch.OpenSearchEmitter"); + Element params = createAndGetElement(document, emitterElement, "params"); + appendTextElement(document, params, "name", "emitter"); + appendTextElement(document, params, "idField", "_id"); + + appendTextElement(document, params, OPEN_SEARCH_URL, + emitter.getAttributes().get(OPEN_SEARCH_URL)); + appendTextElement(document, params, OPEN_SEARCH_UPDATE_STRATEGY, + emitter.getAttributes().get(OPEN_SEARCH_UPDATE_STRATEGY)); + //for now, we need this for upserts. parent+child upserts are not + //yet supported in Tika. + appendTextElement(document, params, "attachmentStrategy", "SEPARATE_DOCUMENTS"); + appendTextElement(document, params, "connectionTimeout", "60000"); + appendTextElement(document, params, "socketTimeout", "120000"); + ; + String userName = emitter.getAttributes().get(OPEN_SEARCH_USER); + String password = emitter.getAttributes().get(OPEN_SEARCH_PW); + if (!StringUtils.isBlank(userName) && !StringUtils.isBlank(password)) { + appendTextElement(document, params, "userName", + emitter.getAttributes().get(OPEN_SEARCH_USER)); + appendTextElement(document, params, "password", + emitter.getAttributes().get(OPEN_SEARCH_PW)); + } + } + + private void appendFSEmitter(ConfigItem emitter, Document document, Element emitters) + throws IOException { + Element emitterElement = createAndGetElement(document, emitters, "emitter", "class", + "org.apache.tika.pipes.emitter.fs.FileSystemEmitter"); + Element params = createAndGetElement(document, emitterElement, "params"); + appendTextElement(document, params, "name", "emitter"); + appendTextElement(document, params, BASE_PATH, emitter.getAttributes().get(BASE_PATH)); + } + + + private void appendFetcher(BatchProcessConfig batchProcessConfig, Document document, + Element properties) throws IOException { + Optional optionalFetcher = batchProcessConfig.getFetcher(); + if (optionalFetcher.isEmpty()) { + LOGGER.warn("fetcher is empty?!"); + return; + } + ConfigItem fetcher = optionalFetcher.get(); + switch (fetcher.getClazz()) { + case "org.apache.tika.pipes.fetcher.fs.FileSystemFetcher": + appendFSFetcher(fetcher, document, properties); + break; + default: + throw new RuntimeException("I regret I don't yet support " + fetcher.getClazz()); + } + } + + private void appendFSFetcher(ConfigItem fetcher, Document document, Element properties) + throws IOException { + Element fetchers = createAndGetElement(document, properties, "fetchers"); + Element fetcherElement = + createAndGetElement(document, fetchers, "fetcher", + "class", FS_FETCHER_CLASS); + Element params = createAndGetElement(document, fetcherElement, "params"); + appendTextElement(document, params, "name", "fetcher"); + appendTextElement(document, params, BASE_PATH, fetcher.getAttributes().get(BASE_PATH)); + + } + + private void appendPipesIterator(BatchProcessConfig batchProcessConfig, Document document, + Element properties) throws IOException { + Optional optionalPipesIterator = batchProcessConfig.getPipesIterator(); + if (optionalPipesIterator.isEmpty()) { + LOGGER.warn("pipesIterator is empty?!"); + return; + } + ConfigItem pipesIterator = optionalPipesIterator.get(); + switch (pipesIterator.getClazz()) { + case "org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator": + appendFSPipesIterator(pipesIterator, document, properties); + break; + default: + throw new RuntimeException( + "I regret I don't yet support " + pipesIterator.getClazz()); + } + } + + private void appendFSPipesIterator(ConfigItem pipesIterator, Document document, Element parent) + throws IOException { + Element pipesIteratorElement = + createAndGetElement(document, parent, "pipesIterator", + "class", "org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator"); + Element params = createAndGetElement(document, pipesIteratorElement, "params"); + appendTextElement(document, params, "fetcherName", "fetcher"); + appendTextElement(document, params, "emitterName", "emitter"); + appendTextElement(document, params, "basePath", + pipesIterator.getAttributes().get(BASE_PATH)); + appendTextElement(document, params, "countTotal", "true"); + } + + private void appendMetadataFilter(BatchProcessConfig batchProcessConfig, Document document, + Element properties) throws IOException { + + Optional configItem = batchProcessConfig.getEmitter(); + if (configItem.isEmpty()) { + LOGGER.warn("emitter is empty?!"); + return; + } + Element metadataFilters = document.createElement("metadataFilters"); + properties.appendChild(metadataFilters); + appendLeafElement(document, metadataFilters, "metadataFilter", "class", + "org.apache.tika.metadata.filter.GeoPointMetadataFilter"); + appendLeafElement(document, metadataFilters, "metadataFilter", "class", + "org.apache.tika.metadata.filter.DateNormalizingMetadataFilter"); + appendLeafElement(document, metadataFilters, "metadataFilter", "class", + "org.apache.tika.eval.core.metadata.TikaEvalMetadataFilter"); + + ConfigItem emitter = configItem.get(); + Optional> metadataTuples = emitter.getMetadataTuples(); + if (metadataTuples.isEmpty() || metadataTuples.get().size() == 0) { + return; + } + Element filter = createAndGetElement(document, metadataFilters, "metadataFilter", "class", + "org.apache.tika.metadata.filter.FieldNameMappingFilter"); + Element params = createAndGetElement(document, filter, "params"); + appendTextElement(document, params, "excludeUnmapped", "true"); + + Map map = new LinkedHashMap<>(); + metadataTuples.get().stream().forEach(e -> map.put(e.getTika(), e.getOutput())); + + appendMap(document, params, "mappings", "mapping", map); + } + + private void appendMap(Document document, Element parent, String mappingsElementName, + String mappingElementName, Map map, String... attrs) { + Element mappings = createAndGetElement(document, parent, mappingsElementName, attrs); + for (Map.Entry e : map.entrySet()) { + appendLeafElement(document, mappings, mappingElementName, "from", e.getKey(), "to", + e.getValue()); + } + } + + private void appendTextElement(Document document, Element parent, String itemName, String text, + String... attrs) { + Element el = createAndGetElement(document, parent, itemName, attrs); + el.setTextContent(text); + } + + private Element createAndGetElement(Document document, Element parent, String elementName, + String... attrs) { + Element el = document.createElement(elementName); + parent.appendChild(el); + for (int i = 0; i < attrs.length; i += 2) { + el.setAttribute(attrs[i], attrs[i + 1]); + } + return el; + } + + private void appendLeafElement(Document document, Element parent, String elementName, + String... attrs) { + createAndGetElement(document, parent, elementName, attrs); + } + + private String getTemplateLog4j2(String template) throws IOException { + try (InputStream is = this.getClass() + .getResourceAsStream("/templates/log4j2/" + template)) { + return IOUtils.toString(is, StandardCharsets.UTF_8); + } + } +} diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/csv/CSVEmitterHelper.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/csv/CSVEmitterHelper.java index df8f8b3..05d75bc 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/csv/CSVEmitterHelper.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/csv/CSVEmitterHelper.java @@ -39,10 +39,10 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; @@ -51,7 +51,7 @@ public class CSVEmitterHelper { private static Logger LOGGER = LogManager.getLogger(CSVEmitterHelper.class); public static void setUp(ConfigItem emitter) throws IOException { - if (! emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { return; } Path dbDir = Files.createTempDirectory("tika-app-csv-tmp"); @@ -72,9 +72,8 @@ public static void createTable(ConfigItem emitter) throws SQLException { } createTable.append(")"); LOGGER.debug("create table: " + createTable); - try (Connection connection = - DriverManager.getConnection(emitter.getAttributes() - .get(Constants.CSV_JDBC_CONNECTION_STRING))) { + try (Connection connection = DriverManager.getConnection( + emitter.getAttributes().get(Constants.CSV_JDBC_CONNECTION_STRING))) { try (Statement st = connection.createStatement()) { st.execute(sql); st.execute(createTable.toString()); @@ -96,7 +95,7 @@ public static void writeCSV(AppContext appContext) { return; } ConfigItem emitter = optionalConfigItem.get(); - if (! emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { return; } LOGGER.debug("about to write csv"); @@ -106,12 +105,12 @@ public static void writeCSV(AppContext appContext) { Path csvPath = getCsvPath(emitter); LOGGER.debug("about to write " + csvPath.toAbsolutePath()); int rows = 0; - try (OutputStream os = Files.newOutputStream(csvPath); CSVPrinter printer = - new CSVPrinter(new OutputStreamWriter(os, UTF_8), CSVFormat.EXCEL)) { + try (OutputStream os = Files.newOutputStream(csvPath); + CSVPrinter printer = new CSVPrinter(new OutputStreamWriter(os, UTF_8), + CSVFormat.EXCEL)) { writeHeaders(printer, emitter); - Connection connection = - DriverManager.getConnection(getConnectionString(emitter)); + Connection connection = DriverManager.getConnection(getConnectionString(emitter)); try (Statement st = connection.createStatement()) { List cells = new ArrayList<>(); Integer columnCount = null; @@ -131,8 +130,7 @@ public static void writeCSV(AppContext appContext) { } catch (IOException e) { LOGGER.warn("Failed to write CSV", e); } - LOGGER.info("successfully wrote {} rows to {}", rows, - csvPath.toAbsolutePath()); + LOGGER.info("successfully wrote {} rows to {}", rows, csvPath.toAbsolutePath()); try { cleanCSVTempResources(emitter.getAttributes().get(Constants.CSV_DB_DIRECTORY)); } catch (IOException e) { @@ -157,7 +155,7 @@ private static String getSelect(ConfigItem emitter) { } public static void cleanCSVTempResources(ConfigItem emitter) throws IOException { - if (! emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { return; } cleanCSVTempResources(emitter.getAttributes().get(Constants.CSV_DB_DIRECTORY)); @@ -165,7 +163,7 @@ public static void cleanCSVTempResources(ConfigItem emitter) throws IOException private static void cleanCSVTempResources(String path) throws IOException { Path tmpDbDir = Paths.get(path); - if (! Files.isDirectory(tmpDbDir)) { + if (!Files.isDirectory(tmpDbDir)) { LOGGER.warn("Not a directory?! {}", path); return; } @@ -176,8 +174,7 @@ private static String getConnectionString(ConfigItem item) { return item.getAttributes().get(Constants.CSV_JDBC_CONNECTION_STRING); } - private static void writeHeaders(CSVPrinter printer, ConfigItem configItem) - throws IOException { + private static void writeHeaders(CSVPrinter printer, ConfigItem configItem) throws IOException { List headers = new ArrayList<>(); headers.add("path"); headers.add("status"); diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/ctx/AppContext.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/ctx/AppContext.java index b2ac063..5ac8c4c 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/ctx/AppContext.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/ctx/AppContext.java @@ -32,17 +32,28 @@ import org.apache.commons.io.FileUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.tallison.tika.app.fx.tools.BatchProcess; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; +import org.tallison.tika.app.fx.batch.BatchProcess; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; import org.apache.tika.utils.StringUtils; public class AppContext { + public static Path TIKA_GUI_JAVA_HOME; + public static Path TIKA_APP_HOME = Paths.get(""); + public static Path TIKA_LIB_PATH = TIKA_APP_HOME.resolve("lib"); + public static Path TIKA_CORE_BIN_PATH = TIKA_LIB_PATH.resolve("tika-core"); + public static Path TIKA_APP_BIN_PATH = TIKA_LIB_PATH.resolve("tika-app"); + public static Path TIKA_EXTRAS_BIN_PATH = TIKA_LIB_PATH.resolve("tika-extras"); + public static Path APP_STATE_PATH = TIKA_APP_HOME.resolve("config/tika-app-v2-config.json"); + public static Path CONFIG_PATH = TIKA_APP_HOME.resolve("config"); + public static Path ASYNC_LOG4J2_PATH = CONFIG_PATH.resolve("log4j2-async.xml"); + public static Path LOGS_PATH = TIKA_APP_HOME.resolve("logs"); + public static Path BATCH_STATUS_PATH = LOGS_PATH.resolve("batch_status.json"); private static Logger LOGGER = LogManager.getLogger(AppContext.class); - private static ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static volatile AppContext APP_CONTEXT; static { //this is necessary for optionals @@ -51,38 +62,23 @@ public class AppContext { OBJECT_MAPPER.registerModule(new Jdk8Module()); } - public static Path TIKA_GUI_JAVA_HOME; static { - if (! StringUtils.isBlank(System.getProperty("TIKA_GUI_JAVA_HOME"))) { + if (!StringUtils.isBlank(System.getProperty("TIKA_GUI_JAVA_HOME"))) { LOGGER.debug("setting TIKA_GUI_JAVA_HOME {}", System.getProperty("TIKA_GUI_JAVA_HOME")); TIKA_GUI_JAVA_HOME = Paths.get(System.getProperty("TIKA_GUI_JAVA_HOME")); - } else if (! StringUtils.isBlank(System.getProperty("java.home"))) { + } else if (!StringUtils.isBlank(System.getProperty("java.home"))) { TIKA_GUI_JAVA_HOME = Paths.get(System.getProperty("java.home")); LOGGER.debug("setting TIKA_GUI_JAVA_HOME {} from java.home", System.getProperty("java.home")); } } - public static Path TIKA_APP_HOME = Paths.get(""); - public static Path TIKA_LIB_PATH = TIKA_APP_HOME.resolve("lib"); - public static Path TIKA_CORE_BIN_PATH = TIKA_LIB_PATH.resolve("tika-core"); - public static Path TIKA_APP_BIN_PATH = TIKA_LIB_PATH.resolve("tika-app"); - public static Path TIKA_EXTRAS_BIN_PATH = TIKA_LIB_PATH.resolve("tika-extras"); - public static Path APP_STATE_PATH = TIKA_APP_HOME.resolve("config/tika-app-v2-config.json"); - private static volatile AppContext APP_CONTEXT; - public static Path CONFIG_PATH = TIKA_APP_HOME.resolve("config"); - public static Path ASYNC_LOG4J2_PATH = CONFIG_PATH.resolve("log4j2-async.xml"); - public static Path LOGS_PATH = TIKA_APP_HOME.resolve("logs"); - public static Path BATCH_STATUS_PATH = LOGS_PATH.resolve("batch_status.json"); - - + private final boolean allowBatchToRunOnExit = false; private String tikaVersion = "2.6.0"; private Optional batchProcessConfig = Optional.of(new BatchProcessConfig()); - @JsonIgnore private Optional batchProcess = Optional.empty(); private volatile boolean closed = false; - private final boolean allowBatchToRunOnExit = false; public static AppContext load() { @@ -109,7 +105,7 @@ public static synchronized AppContext getInstance() { private static AppContext load(Path configPath) throws IOException { try (Reader reader = Files.newBufferedReader(configPath, StandardCharsets.UTF_8)) { - AppContext appContext = OBJECT_MAPPER.readValue(reader, AppContext.class); + AppContext appContext = OBJECT_MAPPER.readValue(reader, AppContext.class); //for now, set the batch process to null appContext.setBatchProcess(null); return appContext; diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java index 3e2ae59..9e5ca36 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/AbstractEmitterController.java @@ -42,11 +42,11 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.tallison.tika.app.fx.ControllerBase; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.metadata.MetadataRow; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; @@ -56,11 +56,6 @@ public abstract class AbstractEmitterController extends ControllerBase { @FXML private final ObservableList metadataRows = FXCollections.observableArrayList(); - - public ObservableList getMetadataRows() { - return metadataRows; - } - @FXML private TextField tikaField; @FXML @@ -69,12 +64,16 @@ public ObservableList getMetadataRows() { private TextField propertyField; private Optional csvMetadataPath = Optional.empty(); + public ObservableList getMetadataRows() { + return metadataRows; + } + abstract protected void saveState(); /** * This confirms the string is not empty and represents an * actual path that exists as a regular file. - * + *

* It will silently do nothing if these conditions are not met. * * @param csvMetadataFilePath @@ -122,9 +121,9 @@ public void loadMetadataCSV(ActionEvent actionEvent) throws IOException { } private void loadMetadataCSV(File csvFile) throws IOException { - char delimiter = csvFile.getName().endsWith(".txt") || - csvFile.getName().endsWith(".tsv") ? - '\t' : ','; + char delimiter = + csvFile.getName().endsWith(".txt") || csvFile.getName().endsWith(".tsv") ? '\t' : + ','; //TODO add a reader that removes the BOM CSVFormat format = CSVFormat.Builder.create(CSVFormat.EXCEL).setDelimiter(delimiter) .setHeader() // no clue why this is needed,but it is @@ -155,8 +154,8 @@ protected void addMetadataRow(ActionEvent event) { if (!StringUtils.isBlank(tikaField.getText()) && !StringUtils.isBlank(outputField.getText())) { //check that property can be parsed to int > 0 if exists - metadataRows.add(new MetadataRow(tikaField.getText(), - outputField.getText(), propertyField.getText())); + metadataRows.add(new MetadataRow(tikaField.getText(), outputField.getText(), + propertyField.getText())); tikaField.setText(""); outputField.setText(""); propertyField.setText(""); @@ -167,14 +166,15 @@ protected void addMetadataRow(ActionEvent event) { protected void saveMetadataToEmitter(ConfigItem emitter) { List metadataTuples = new ArrayList<>(); for (MetadataRow metadataRow : getMetadataRows()) { - metadataTuples.add(new MetadataTuple(metadataRow.getTika(), - metadataRow.getOutput(), metadataRow.getProperty())); + metadataTuples.add(new MetadataTuple(metadataRow.getTika(), metadataRow.getOutput(), + metadataRow.getProperty())); } emitter.setMetadataTuples(metadataTuples); } /** * This checks for empty keys and duplicate output keys + * * @return */ protected boolean validateMetadataRows() { @@ -187,15 +187,15 @@ protected boolean validateMetadataRows() { String t = row.getTika(); if (StringUtils.isBlank(t)) { alert("Blank Tika key", "Blank Tika key", - "There's an empty Tika key in row " + i + - ". The output value is: " + row.getOutput()); + "There's an empty Tika key in row " + i + ". The output value is: " + + row.getOutput()); return false; } String o = row.getOutput(); if (StringUtils.isBlank(o)) { alert("Blank output key", "Blank output key", - "There's an empty output key in row " + i + - ". The Tika value is: " + row.getTika()); + "There's an empty output key in row " + i + ". The Tika value is: " + + row.getTika()); return false; } else { if (output.contains(o)) { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java index 9b43741..6f368d4 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/CSVEmitterController.java @@ -47,11 +47,11 @@ import org.apache.logging.log4j.Logger; import org.kordamp.ikonli.javafx.FontIcon; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.csv.CSVEmitterHelper; import org.tallison.tika.app.fx.metadata.MetadataRow; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; @@ -99,10 +99,11 @@ public void initialize(URL fxmlFileLocation, ResourceBundle resources) { return; } ConfigItem emitter = configItem.get(); - if (! emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.CSV_EMITTER_CLASS)) { return; } - if (emitter.getMetadataTuples().isPresent() && emitter.getMetadataTuples().get().size() > 0) { + if (emitter.getMetadataTuples().isPresent() && + emitter.getMetadataTuples().get().size() > 0) { getMetadataRows().clear(); for (MetadataTuple t : emitter.getMetadataTuples().get()) { getMetadataRows().add(new MetadataRow(t.getTika(), t.getOutput(), t.getProperty())); @@ -177,13 +178,12 @@ public void saveState() { } Optional csvMetadataPath = getCsvMetadataPath(); - String csvMetadataPathString = csvMetadataPath.isPresent() ? - csvMetadataPath.get().toAbsolutePath().toString() : StringUtils.EMPTY; + String csvMetadataPathString = + csvMetadataPath.isPresent() ? csvMetadataPath.get().toAbsolutePath().toString() : + StringUtils.EMPTY; - ConfigItem emitter = ConfigItem.build(shortLabel, fullLabel, - Constants.CSV_EMITTER_CLASS, - Constants.BASE_PATH, directoryString, - Constants.CSV_FILE_NAME, csvOutputFileString, + ConfigItem emitter = ConfigItem.build(shortLabel, fullLabel, Constants.CSV_EMITTER_CLASS, + Constants.BASE_PATH, directoryString, Constants.CSV_FILE_NAME, csvOutputFileString, Constants.CSV_METADATA_PATH, csvMetadataPathString); saveMetadataToEmitter(emitter); @@ -271,7 +271,7 @@ public void updateCSV(ActionEvent actionEvent) { if (Files.isRegularFile(csvFile)) { success = deleteCSVFileDialog(csvFile); } - if (! success) { + if (!success) { LOGGER.warn("didn't delete csv file"); actionEvent.consume(); return; @@ -289,7 +289,7 @@ public void updateCSV(ActionEvent actionEvent) { readyIcon.setVisible(true); notReadyIcon.setVisible(false); - ((Stage)updateCSV.getScene().getWindow()).close(); + ((Stage) updateCSV.getScene().getWindow()).close(); } private boolean deleteCSVFileDialog(Path csvFile) { @@ -307,8 +307,8 @@ private boolean deleteCSVFileDialog(Path csvFile) { Files.delete(csvFile); success.set(true); } catch (IOException e) { - alert(ALERT_TITLE, "Couldn't delete csv file", "Couldn't delete file: " + - csvFile.toAbsolutePath()); + alert(ALERT_TITLE, "Couldn't delete csv file", + "Couldn't delete file: " + csvFile.toAbsolutePath()); } } else if (type.getText().startsWith("Cancel")) { return; diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/FileSystemEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/FileSystemEmitterController.java index 4b3e9b3..3bd9e47 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/FileSystemEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/FileSystemEmitterController.java @@ -34,16 +34,16 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.metadata.MetadataRow; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; -public class FileSystemEmitterController extends AbstractEmitterController implements - Initializable { +public class FileSystemEmitterController extends AbstractEmitterController + implements Initializable { private static AppContext APP_CONTEXT = AppContext.getInstance(); private static Logger LOGGER = LogManager.getLogger(FileSystemEmitterController.class); @@ -69,10 +69,11 @@ public void initialize(URL location, ResourceBundle resources) { } ConfigItem emitter = APP_CONTEXT.getBatchProcessConfig().get().getEmitter().get(); - if (! emitter.getClazz().equals(Constants.FS_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.FS_EMITTER_CLASS)) { return; } - if (emitter.getMetadataTuples().isPresent() && emitter.getMetadataTuples().get().size() > 0) { + if (emitter.getMetadataTuples().isPresent() && + emitter.getMetadataTuples().get().size() > 0) { getMetadataRows().clear(); for (MetadataTuple t : emitter.getMetadataTuples().get()) { getMetadataRows().add(new MetadataRow(t.getTika(), t.getOutput(), t.getProperty())); @@ -128,10 +129,8 @@ protected void saveState() { Path p = directory.get(); String shortLabel = "FileSystem: " + ellipsize(p.getFileName().toString(), 30); String fullLabel = "FileSystem: " + p.toAbsolutePath(); - ConfigItem emitter = ConfigItem.build( - shortLabel, fullLabel, Constants.FS_EMITTER_CLASS, - Constants.BASE_PATH, p.toAbsolutePath().toString() - ); + ConfigItem emitter = ConfigItem.build(shortLabel, fullLabel, Constants.FS_EMITTER_CLASS, + Constants.BASE_PATH, p.toAbsolutePath().toString()); saveMetadataToEmitter(emitter); bpc.setEmitter(emitter); } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/JDBCEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/JDBCEmitterController.java index 093c1a1..14dc88f 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/JDBCEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/JDBCEmitterController.java @@ -40,58 +40,36 @@ import org.apache.logging.log4j.Logger; import org.kordamp.ikonli.javafx.FontIcon; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.metadata.MetadataRow; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; public class JDBCEmitterController extends AbstractEmitterController implements Initializable { - private enum VALIDITY { - NO_CONNECTION_STRING, - METADATA_NOT_CONFIGURED, - METADATA_ANOMALY, - FAILED_TO_CONNECT, - VALID, - COLUMN_MISMATCH, - TABLE_EXISTS_WITH_DATA, - NEED_TO_CREATE_TABLE, - SQL_EXCEPTION - } - + private final static int TAB_INDEX = 3; private static String ALERT_TITLE = "JDBC Emitter"; private static Logger LOGGER = LogManager.getLogger(JDBCEmitterController.class); private static String PATH_COL_NAME = "path"; private static String ATTACHMENT_NUM_COL_NAME = "attach_num"; - - private final static int TAB_INDEX = 3; - private String insertSql = StringUtils.EMPTY; - @FXML private TextField jdbcConnection; - @FXML private TextField tableName; - @FXML private Button validateJDBC; - @FXML private FontIcon readyIcon; - @FXML private FontIcon notReadyIcon; - @FXML private Accordion jdbcAccordion; - - @Override public void initialize(URL fxmlFileLocation, ResourceBundle resources) { //Not clear why expanded=true is not working in fxml @@ -107,25 +85,26 @@ public void initialize(URL fxmlFileLocation, ResourceBundle resources) { return; } ConfigItem emitter = configItem.get(); - if (! emitter.getClazz().equals(Constants.JDBC_EMITTER_CLASS)) { + if (!emitter.getClazz().equals(Constants.JDBC_EMITTER_CLASS)) { return; } if (emitter.getAttributes().containsKey(Constants.JDBC_CONNECTION_STRING)) { String s = emitter.getAttributes().get(Constants.JDBC_CONNECTION_STRING); - if (! StringUtils.isBlank(s)) { + if (!StringUtils.isBlank(s)) { jdbcConnection.setText(s); } } if (emitter.getAttributes().containsKey(Constants.JDBC_TABLE_NAME)) { String s = emitter.getAttributes().get(Constants.JDBC_TABLE_NAME); - if (! StringUtils.isBlank(s)) { + if (!StringUtils.isBlank(s)) { tableName.setText(s); } } - if (emitter.getMetadataTuples().isPresent() && emitter.getMetadataTuples().get().size() > 0) { + if (emitter.getMetadataTuples().isPresent() && + emitter.getMetadataTuples().get().size() > 0) { getMetadataRows().clear(); for (MetadataTuple t : emitter.getMetadataTuples().get()) { getMetadataRows().add(new MetadataRow(t.getTika(), t.getOutput(), t.getProperty())); @@ -149,21 +128,19 @@ public void saveState() { String jdbcConnectionString = StringUtils.EMPTY; String tableNameString = StringUtils.EMPTY; - if (! StringUtils.isBlank(jdbcConnection.getText())) { + if (!StringUtils.isBlank(jdbcConnection.getText())) { jdbcConnectionString = jdbcConnection.getText(); } - if (! StringUtils.isBlank(tableName.getText())) { + if (!StringUtils.isBlank(tableName.getText())) { tableNameString = tableName.getText(); shortLabel = "JDBC: " + ellipsize(tableNameString, 30); fullLabel = "JDBC: " + tableNameString; } - ConfigItem emitter = ConfigItem.build(shortLabel, fullLabel, - Constants.JDBC_EMITTER_CLASS, - Constants.JDBC_CONNECTION_STRING, jdbcConnectionString, - Constants.JDBC_TABLE_NAME, tableNameString, - Constants.JDBC_INSERT_SQL, insertSql); + ConfigItem emitter = ConfigItem.build(shortLabel, fullLabel, Constants.JDBC_EMITTER_CLASS, + Constants.JDBC_CONNECTION_STRING, jdbcConnectionString, Constants.JDBC_TABLE_NAME, + tableNameString, Constants.JDBC_INSERT_SQL, insertSql); saveMetadataToEmitter(emitter); @@ -179,7 +156,6 @@ public void saveState() { APP_CONTEXT.saveState(); } - public void validateJDBC(ActionEvent actionEvent) { VALIDITY validity = validate(); switch (validity) { @@ -245,7 +221,6 @@ private void columnMismatchDialog() { }); } - private void existingDataDialog() { Alert alert = new Alert(Alert.AlertType.CONFIRMATION); alert.setTitle(ALERT_TITLE); @@ -309,7 +284,7 @@ private VALIDITY validate() { return VALIDITY.METADATA_NOT_CONFIGURED; } boolean validMetadata = validateMetadata(); - if (! validMetadata) { + if (!validMetadata) { return VALIDITY.METADATA_ANOMALY; } @@ -329,9 +304,10 @@ private VALIDITY validate() { try (Connection connection = DriverManager.getConnection(cString)) { try (Statement st = connection.createStatement()) { int rows = 0; - try (ResultSet rs = st.executeQuery("select * from " + tableName.getText() + " limit 10;")) { + try (ResultSet rs = st.executeQuery( + "select * from " + tableName.getText() + " limit 10;")) { boolean validColumns = validateColumns(rs.getMetaData()); - if (! validColumns) { + if (!validColumns) { //TODO -- add a drop table or modify option //TODO -- show the specific mismatch return VALIDITY.COLUMN_MISMATCH; @@ -361,15 +337,16 @@ private boolean validateMetadata() { private boolean validateColumns(ResultSetMetaData metaData) throws SQLException { //TODO -- check column types! - for (int i = 1; i <= metaData.getColumnCount(); i++) { + for (int i = 1; i <= metaData.getColumnCount(); i++) { if (i == 1) { - if (! PATH_COL_NAME.equalsIgnoreCase(metaData.getColumnName(i))) { - alert(ALERT_TITLE, "Unexpected column name", "First column should be: " + PATH_COL_NAME); + if (!PATH_COL_NAME.equalsIgnoreCase(metaData.getColumnName(i))) { + alert(ALERT_TITLE, "Unexpected column name", + "First column should be: " + PATH_COL_NAME); return false; } } if (i == 2) { - if (! ATTACHMENT_NUM_COL_NAME.equalsIgnoreCase(metaData.getColumnName(i))) { + if (!ATTACHMENT_NUM_COL_NAME.equalsIgnoreCase(metaData.getColumnName(i))) { alert(ALERT_TITLE, "Unexpected column name", "Second column should be: " + ATTACHMENT_NUM_COL_NAME); return false; @@ -377,11 +354,12 @@ private boolean validateColumns(ResultSetMetaData metaData) throws SQLException } if (i > 2) { int tableRow = i - 3; - if (!metaData.getColumnName(i).equalsIgnoreCase(getMetadataRows().get(tableRow).getOutput())) { + if (!metaData.getColumnName(i) + .equalsIgnoreCase(getMetadataRows().get(tableRow).getOutput())) { alert(ALERT_TITLE, "Unexpected column name", "Column number (" + i + ") should be: " + getMetadataRows().get(tableRow).getOutput() + " but is " + - metaData.getColumnName(i)); + metaData.getColumnName(i)); return false; } } @@ -393,7 +371,7 @@ private boolean validateColumns(ResultSetMetaData metaData) throws SQLException sb.append("'").append(col).append("'").append(" "); } String warn = sb.toString().trim(); - if (! StringUtils.isBlank(warn)) { + if (!StringUtils.isBlank(warn)) { alert(ALERT_TITLE, "Unexpected column(s)", "Columns defined in metadata but not defined in the table: " + warn); return false; @@ -448,4 +426,9 @@ private String createInsertString() { return sb.toString(); } + private enum VALIDITY { + NO_CONNECTION_STRING, METADATA_NOT_CONFIGURED, METADATA_ANOMALY, FAILED_TO_CONNECT, VALID, + COLUMN_MISMATCH, TABLE_EXISTS_WITH_DATA, NEED_TO_CREATE_TABLE, SQL_EXCEPTION + } + } diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/OpenSearchEmitterController.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/OpenSearchEmitterController.java index d6d8385..515b8dd 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/OpenSearchEmitterController.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/emitters/OpenSearchEmitterController.java @@ -34,23 +34,21 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.tallison.tika.app.fx.Constants; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; +import org.tallison.tika.app.fx.config.ConfigItem; import org.tallison.tika.app.fx.ctx.AppContext; import org.tallison.tika.app.fx.metadata.MetadataRow; import org.tallison.tika.app.fx.metadata.MetadataTuple; -import org.tallison.tika.app.fx.tools.BatchProcessConfig; -import org.tallison.tika.app.fx.tools.ConfigItem; import org.apache.tika.utils.StringUtils; public class OpenSearchEmitterController extends AbstractEmitterController implements Initializable { - private static AppContext APP_CONTEXT = AppContext.getInstance(); - private static Logger LOGGER = LogManager.getLogger(OpenSearchEmitterController.class); - //TODO -- this is bad private static final Pattern SIMPLE_URL_PATTERN = Pattern.compile("(?i)^https?:\\/\\/[-_a-z0-9\\.]+(?::\\d+)?\\/([-_a-z0-9\\.]+)"); - + private static AppContext APP_CONTEXT = AppContext.getInstance(); + private static Logger LOGGER = LogManager.getLogger(OpenSearchEmitterController.class); @FXML private TextField openSearchUrl; @@ -96,7 +94,8 @@ public void initialize(URL fxmlFileLocation, ResourceBundle resources) { emitter.getMetadataTuples().get().size() > 0) { getMetadataRows().clear(); for (MetadataTuple t : emitter.getMetadataTuples().get()) { - getMetadataRows().add(new MetadataRow(t.getTika(), t.getOutput(), t.getProperty())); + getMetadataRows().add( + new MetadataRow(t.getTika(), t.getOutput(), t.getProperty())); } } } else { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/metadata/MetadataRow.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/metadata/MetadataRow.java index ad52b10..a788d2f 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/metadata/MetadataRow.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/metadata/MetadataRow.java @@ -64,14 +64,14 @@ public String getProperty() { return property.get(); } - public SimpleStringProperty propertyProperty() { - return property; - } - public void setProperty(String property) { this.property.set(property); } + public SimpleStringProperty propertyProperty() { + return property; + } + @Override public String toString() { return "MetadataRow{" + "tika=" + tika + ", output=" + output + ", property=" + property + diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/MutableStatus.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/MutableStatus.java index da7bad3..8987b83 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/MutableStatus.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/MutableStatus.java @@ -16,7 +16,7 @@ */ package org.tallison.tika.app.fx.status; -import org.tallison.tika.app.fx.tools.BatchProcess; +import org.tallison.tika.app.fx.batch.BatchProcess; public class MutableStatus { diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusCount.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusCount.java index a9349c4..7072298 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusCount.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusCount.java @@ -32,26 +32,26 @@ public String getStatusName() { return statusName.get(); } - public SimpleStringProperty statusNameProperty() { - return statusName; - } - public void setStatusName(String statusName) { this.statusName.set(statusName); } - public double getCount() { - return count.get(); + public SimpleStringProperty statusNameProperty() { + return statusName; } - public SimpleDoubleProperty countProperty() { - return count; + public double getCount() { + return count.get(); } public void setCount(double count) { this.count.set(count); } + public SimpleDoubleProperty countProperty() { + return count; + } + @Override public int compareTo(StatusCount o) { return Double.compare(o.count.get(), this.count.get()); diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusUpdater.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusUpdater.java index 560e707..532c12a 100644 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusUpdater.java +++ b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/status/StatusUpdater.java @@ -26,7 +26,7 @@ import org.apache.logging.log4j.Logger; import org.tallison.tika.app.fx.ControllerBase; import org.tallison.tika.app.fx.TikaController; -import org.tallison.tika.app.fx.tools.BatchProcess; +import org.tallison.tika.app.fx.batch.BatchProcess; import org.apache.tika.pipes.PipesResult; import org.apache.tika.pipes.async.AsyncStatus; @@ -34,10 +34,10 @@ public class StatusUpdater implements Callable { private static Logger LOGGER = LogManager.getLogger(StatusUpdater.class); - private SimpleFloatProperty progressValue = new SimpleFloatProperty(0.0f); private final ProgressIndicator progressIndicator; private final TikaController tikaController; private final BatchProcess batchProcess; + private SimpleFloatProperty progressValue = new SimpleFloatProperty(0.0f); public StatusUpdater(BatchProcess batchProcess, TikaController tikaController) { this.batchProcess = batchProcess; @@ -58,8 +58,8 @@ public Integer call() throws Exception { if (asyncStatusOptional.isPresent()) { AsyncStatus asyncStatus = asyncStatusOptional.get(); long processed = 0; - for (Map.Entry e : - asyncStatus.getStatusCounts().entrySet()) { + for (Map.Entry e : asyncStatus.getStatusCounts() + .entrySet()) { processed += e.getValue(); } LOGGER.debug("processed {}", asyncStatus); @@ -87,9 +87,8 @@ public Integer call() throws Exception { Optional exception = batchProcess.getJvmException(); Optional jvmError = batchProcess.getJvmErrorMsg(); if (exception.isPresent()) { - ControllerBase.alertStackTrace("Batch process failed", - "Batch process failed", "Serious problem", - exception.get()); + ControllerBase.alertStackTrace("Batch process failed", "Batch process failed", + "Serious problem", exception.get()); } else if (jvmError.isPresent()) { ControllerBase.alert("Batch process failed", "Batch process failed", jvmError.get()); @@ -100,8 +99,8 @@ public Integer call() throws Exception { } return 1; } - if (asyncStatusOptional.isPresent() && - asyncStatusOptional.get().getAsyncStatus() == AsyncStatus.ASYNC_STATUS.COMPLETED) { + if (asyncStatusOptional.isPresent() && asyncStatusOptional.get().getAsyncStatus() == + AsyncStatus.ASYNC_STATUS.COMPLETED) { progressValue.set(1.0f); tikaController.updateButtons(BatchProcess.STATUS.COMPLETE); return 1; diff --git a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/TikaConfigWriter.java b/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/TikaConfigWriter.java deleted file mode 100644 index 5de6635..0000000 --- a/tika-gui-app/src/main/java/org/tallison/tika/app/fx/tools/TikaConfigWriter.java +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.tallison.tika.app.fx.tools; - -import static org.tallison.tika.app.fx.Constants.BASE_PATH; -import static org.tallison.tika.app.fx.Constants.CSV_EMITTER_CLASS; -import static org.tallison.tika.app.fx.Constants.CSV_JDBC_CONNECTION_STRING; -import static org.tallison.tika.app.fx.Constants.CSV_JDBC_INSERT_SQL; -import static org.tallison.tika.app.fx.Constants.JDBC_CONNECTION_STRING; -import static org.tallison.tika.app.fx.Constants.JDBC_EMITTER_CLASS; -import static org.tallison.tika.app.fx.Constants.JDBC_INSERT_SQL; -import static org.tallison.tika.app.fx.Constants.NO_DIGEST; -import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_PW; -import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_UPDATE_STRATEGY; -import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_URL; -import static org.tallison.tika.app.fx.Constants.OPEN_SEARCH_USER; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; -import java.nio.file.StandardOpenOption; -import java.util.List; -import java.util.Optional; - -import org.apache.commons.io.IOUtils; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.tallison.tika.app.fx.Constants; -import org.tallison.tika.app.fx.ctx.AppContext; -import org.tallison.tika.app.fx.metadata.MetadataTuple; - -import org.apache.tika.utils.ProcessUtils; -import org.apache.tika.utils.StringUtils; - -/** - * This is an embarrassment of hardcoding. Need to figure out better - * solution... - *

- * This also requires knowledge of all fetchers/emitters in one class. This is, erm, - * less than entirely ideal. - *

- * This is also does not escape xml characters. So, bad, very, very bad. - */ -public class TikaConfigWriter { - private static Logger LOGGER = LogManager.getLogger(TikaConfigWriter.class); - - public void writeLog4j2() throws IOException { - String template = getTemplateLog4j2("log4j2-async.xml"); - template = - template.replace("{LOGS_PATH}", AppContext.LOGS_PATH.toAbsolutePath().toString()); - - if (!Files.isDirectory(AppContext.ASYNC_LOG4J2_PATH.getParent())) { - Files.createDirectories(AppContext.ASYNC_LOG4J2_PATH.getParent()); - } - Files.write(AppContext.ASYNC_LOG4J2_PATH, template.getBytes(StandardCharsets.UTF_8), - StandardOpenOption.CREATE); - - //not actually a template - String xml = getTemplateLog4j2("log4j2-async-cli.xml"); - Files.write(AppContext.CONFIG_PATH.resolve("log4j2-async-cli.xml"), - xml.getBytes(StandardCharsets.UTF_8), - StandardOpenOption.CREATE); - - } - - public Path writeConfig(BatchProcessConfig batchProcessConfig) throws IOException { - if (!Files.isDirectory(AppContext.CONFIG_PATH)) { - Files.createDirectories(AppContext.CONFIG_PATH); - } - Path tmp = Files.createTempFile(AppContext.CONFIG_PATH, "tika-config-", ".xml"); - StringBuilder sb = new StringBuilder(); - sb.append("\n"); - sb.append("\n"); - sb.append(getTemplate("parsers.xml")).append("\n"); - appendMetadataFilter(batchProcessConfig, sb); - appendPipesIterator(batchProcessConfig, sb); - appendFetcher(batchProcessConfig, sb); - appendEmitter(batchProcessConfig, sb); - appendAsync(batchProcessConfig, sb); - appendAutoDetectParserConfig(batchProcessConfig, sb); - sb.append(""); - Files.write(tmp, sb.toString().getBytes(StandardCharsets.UTF_8), StandardOpenOption.WRITE); - return tmp; - } - - private void appendAutoDetectParserConfig(BatchProcessConfig batchProcessConfig, - StringBuilder sb) throws IOException { - //good enough for now. we'll have to figure out - //a better option if we add more params - if (batchProcessConfig.getDigest().isEmpty()) { - return; - } - String digestString = batchProcessConfig.getDigest().get(); - if (digestString.equals(NO_DIGEST)) { - return; - } - String async = getTemplate("autoDetectParserConfig.xml"); - - async = async.replace("{DIGEST_STRING}", digestString); - sb.append(async).append("\n"); - } - - private void appendAsync(BatchProcessConfig bpc, StringBuilder sb) throws IOException { - String async = getTemplate("async.xml"); - async = async.replace("{JAVA_PATH}", - AppContext.getInstance().getJavaHome().resolve("java").toString()); - async = async.replace("{NUM_CLIENTS}", Integer.toString(bpc.getNumProcesses())); - async = async.replace("{XMX}", "-Xmx" + bpc.getMaxMemMb() + "m"); - async = async.replace("{ASYNC_LOG}", - AppContext.ASYNC_LOG4J2_PATH.toAbsolutePath().toString()); - async = async.replace("{TIMEOUT_MS}", Long.toString(bpc.getParseTimeoutSeconds() * 1000)); - async = async.replace("{STATUS_FILE}", - AppContext.BATCH_STATUS_PATH.toAbsolutePath().toString()); - async = async.replace("{EMIT_WITHIN_MS}", Long.toString(bpc.getEmitWithinMs())); - async = async.replace("{TOTAL_EMIT_THRESHOLD}", - Long.toString((long) bpc.getTotalEmitThesholdMb())); - async = async.replace("{PER_FILE_EMIT_THRESHOLD}", - Long.toString((long) bpc.getPerFileEmitThresholdMb())); - - async = async.replace("{CLASS_PATH}", buildClassPath(bpc)); - async = addReporters(bpc, async); - sb.append(async).append("\n"); - } - - private String addReporters(BatchProcessConfig bpc, String async) throws IOException { - //TODO -- add opensearch - if (bpc.getEmitter().isEmpty() || - (!bpc.getEmitter().get().getClazz().equals(JDBC_EMITTER_CLASS) && - !bpc.getEmitter().get().getClazz().equals(CSV_EMITTER_CLASS))) { - return async.replace("{JDBC_PIPES_REPORTER}", ""); - } - String jdbcPipesReporter = getTemplate("jdbc-pipes-reporter.xml"); - ConfigItem emitter = bpc.getEmitter().get(); - String connectionString = ""; - if (emitter.getClazz().equals(JDBC_EMITTER_CLASS)) { - connectionString = emitter.getAttributes().get(JDBC_CONNECTION_STRING); - } else if (emitter.getClazz().equals(CSV_EMITTER_CLASS)) { - connectionString = emitter.getAttributes().get(CSV_JDBC_CONNECTION_STRING); - } - jdbcPipesReporter = jdbcPipesReporter.replace("{CONNECTION_STRING}", connectionString); - return async.replace("{JDBC_PIPES_REPORTER}", jdbcPipesReporter); - } - - private String buildClassPath(BatchProcessConfig batchProcessConfig) { - StringBuilder sb = new StringBuilder(); - //load these mappings from a properties file or something - sb.append(ProcessUtils.escapeCommandLine( - AppContext.TIKA_APP_BIN_PATH.toAbsolutePath() + "/*")); - sb.append(File.pathSeparator); - sb.append(ProcessUtils.escapeCommandLine( - AppContext.TIKA_EXTRAS_BIN_PATH.toAbsolutePath() + "/*")); - sb.append(File.pathSeparator); - batchProcessConfig.appendPipesClasspath(sb); - return sb.toString(); - } - - private void appendEmitter(BatchProcessConfig batchProcessConfig, StringBuilder sb) - throws IOException { - Optional optionalEmitter = batchProcessConfig.getEmitter(); - if (optionalEmitter.isEmpty()) { - LOGGER.warn("emitter is empty?!"); - return; - } - ConfigItem emitter = optionalEmitter.get(); - switch (emitter.getClazz()) { - case Constants.FS_EMITTER_CLASS: - appendFSEmitter(emitter, sb); - break; - case Constants.OPEN_SEARCH_EMITTER_CLASS: - appendOpenSearchEmitter(emitter, sb); - break; - case Constants.CSV_EMITTER_CLASS: - appendJDBCEmitter(emitter, - emitter.getAttributes().get(CSV_JDBC_CONNECTION_STRING), - emitter.getAttributes().get(CSV_JDBC_INSERT_SQL), - sb); - break; - case Constants.JDBC_EMITTER_CLASS: - appendJDBCEmitter(emitter, - emitter.getAttributes().get(JDBC_CONNECTION_STRING), - emitter.getAttributes().get(JDBC_INSERT_SQL), - sb); - break; - default: - throw new RuntimeException("I regret I don't yet support " + - batchProcessConfig.getEmitter().get().getClazz()); - } - } - - - private void appendJDBCEmitter(ConfigItem emitter, String connectionString, - String insertString, StringBuilder sb) throws IOException { - String template = getTemplate("jdbc-pipes-emitter.xml"); - //TODO -- a lot better than this. LOL... - connectionString = connectionString.replaceAll("&", "&"); - template = template.replace("{CONNECTION_STRING}", connectionString); - //for now we assume the table was created via the dialog - template = template.replace("{CREATE_TABLE_SQL}", StringUtils.EMPTY); - - template = template.replace("{INSERT_SQL}", insertString); - - StringBuilder columns = new StringBuilder(); - //assume these exist - for (MetadataTuple t : emitter.getMetadataTuples().get()) { - columns.append(""); - } - template = template.replace("{COLUMNS_AND_TYPES}", columns.toString()); - - sb.append(template); - } - - private void appendOpenSearchEmitter(ConfigItem emitter, StringBuilder sb) throws IOException { - String template = getTemplate("opensearch-pipes-emitter.xml"); - - String userName = emitter.getAttributes().get(OPEN_SEARCH_USER); - String password = emitter.getAttributes().get(OPEN_SEARCH_PW); - if (StringUtils.isBlank(userName) && StringUtils.isBlank(password)) { - template = template.replace("{USER_NAME}", ""); - template = template.replace("{PASSWORD}", ""); - } else { - template = template.replace("{USER_NAME}", "" + userName + ""); - template = template.replace("{PASSWORD}", "" + password + ""); - } - - template = - template.replace("{OPENSEARCH_URL}", emitter.getAttributes().get(OPEN_SEARCH_URL)); - template = template.replace("{UPDATE_STRATEGY}", - emitter.getAttributes().get(OPEN_SEARCH_UPDATE_STRATEGY)); - sb.append(template); - } - - private void appendFSEmitter(ConfigItem fetcher, StringBuilder sb) throws IOException { - String template = getTemplate("fs-pipes-emitter.xml"); - template = template.replace("{BASE_PATH}", fetcher.getAttributes().get(BASE_PATH)); - sb.append(template).append("\n"); - } - - - private void appendFetcher(BatchProcessConfig batchProcessConfig, StringBuilder sb) - throws IOException { - Optional optionalFetcher = batchProcessConfig.getFetcher(); - if (optionalFetcher.isEmpty()) { - LOGGER.warn("fetcher is empty?!"); - return; - } - ConfigItem fetcher = optionalFetcher.get(); - switch (fetcher.getClazz()) { - case "org.apache.tika.pipes.fetcher.fs.FileSystemFetcher": - appendFSFetcher(fetcher, sb); - break; - default: - throw new RuntimeException("I regret I don't yet support " + fetcher.getClazz()); - } - } - - private void appendFSFetcher(ConfigItem fetcher, StringBuilder sb) throws IOException { - String template = getTemplate("fs-pipes-fetcher.xml"); - template = template.replace("{BASE_PATH}", fetcher.getAttributes().get(BASE_PATH)); - sb.append(template).append("\n"); - } - - private void appendPipesIterator(BatchProcessConfig batchProcessConfig, StringBuilder sb) - throws IOException { - Optional optionalPipesIterator = batchProcessConfig.getPipesIterator(); - if (optionalPipesIterator.isEmpty()) { - LOGGER.warn("pipesIterator is empty?!"); - return; - } - ConfigItem pipesIterator = optionalPipesIterator.get(); - switch (pipesIterator.getClazz()) { - case "org.apache.tika.pipes.pipesiterator.fs.FileSystemPipesIterator": - appendFSPipesIterator(pipesIterator, sb); - break; - default: - throw new RuntimeException( - "I regret I don't yet support " + pipesIterator.getClazz()); - } - } - - private void appendFSPipesIterator(ConfigItem pipesIterator, StringBuilder sb) - throws IOException { - String template = getTemplate("fs-pipes-iterator.xml"); - template = template.replace("{BASE_PATH}", pipesIterator.getAttributes().get(BASE_PATH)); - sb.append(template).append("\n"); - } - - private void appendMetadataFilter(BatchProcessConfig batchProcessConfig, - StringBuilder tikaConfigBuilder) throws IOException { - - StringBuilder sb = new StringBuilder(); - String template = getTemplate("metadata-filters.xml"); - Optional configItem = batchProcessConfig.getEmitter(); - if (configItem.isEmpty()) { - LOGGER.warn("emitter is empty?!"); - return; - } - ConfigItem emitter = configItem.get(); - Optional> metadataTuples = emitter.getMetadataTuples(); - if (metadataTuples.isEmpty() || metadataTuples.get().size() == 0) { - //add templated metadata filters - template = template.replace("{MAPPING_FILTER}", ""); - tikaConfigBuilder.append(template).append("\n"); - return; - } - - sb.append(""); - sb.append(" \n"); - sb.append(" true\n"); - sb.append(" \n"); - - metadataTuples.get().stream().forEach(e -> sb.append( - " \n")); - - sb.append(" "); - sb.append(" "); - sb.append("\n"); - - template = template.replace("{MAPPING_FILTER}", sb.toString()); - tikaConfigBuilder.append(template).append("\n"); - } - - private String getTemplate(String template) throws IOException { - try (InputStream is = this.getClass() - .getResourceAsStream("/templates/config/" + template)) { - return IOUtils.toString(is, StandardCharsets.UTF_8); - } - } - - private String getTemplateLog4j2(String template) throws IOException { - try (InputStream is = this.getClass() - .getResourceAsStream("/templates/log4j2/" + template)) { - return IOUtils.toString(is, StandardCharsets.UTF_8); - } - } -} diff --git a/tika-gui-app/src/test/java/org/tallison/tika/app/fx/config/TestTikaConfigWriter.java b/tika-gui-app/src/test/java/org/tallison/tika/app/fx/config/TestTikaConfigWriter.java new file mode 100644 index 0000000..0a783f9 --- /dev/null +++ b/tika-gui-app/src/test/java/org/tallison/tika/app/fx/config/TestTikaConfigWriter.java @@ -0,0 +1,36 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.tallison.tika.app.fx.config; + +import java.nio.file.Path; + +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.io.TempDir; +import org.tallison.tika.app.fx.batch.BatchProcessConfig; + +public class TestTikaConfigWriter { + + @Test + public void testBasic(@TempDir Path dir) throws Exception { + + TikaConfigWriter configWriter = new TikaConfigWriter(); + BatchProcessConfig batchProcessConfig = new BatchProcessConfig(); + Path tmp = configWriter.writeConfig(batchProcessConfig, dir); + + } + +} diff --git a/tika-gui-app/src/test/java/org/tallison/tika/app/fx/ctx/TestAppContextSerialization.java b/tika-gui-app/src/test/java/org/tallison/tika/app/fx/ctx/TestAppContextSerialization.java index db7fef3..b65e677 100644 --- a/tika-gui-app/src/test/java/org/tallison/tika/app/fx/ctx/TestAppContextSerialization.java +++ b/tika-gui-app/src/test/java/org/tallison/tika/app/fx/ctx/TestAppContextSerialization.java @@ -25,7 +25,7 @@ import org.junit.jupiter.api.Disabled; import org.junit.jupiter.api.Test; import org.tallison.tika.app.fx.Constants; -import org.tallison.tika.app.fx.tools.BatchProcess; +import org.tallison.tika.app.fx.batch.BatchProcess; import org.apache.tika.pipes.PipesResult;