Skip to content

Commit

Permalink
issue #103 (#121)
Browse files Browse the repository at this point in the history
* swap in h2 instead of sqlite for the csv writer.

* checkstyle

This closes #103
  • Loading branch information
tballison committed Mar 28, 2023
1 parent 77b1660 commit d3ef56e
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 26 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -228,10 +228,8 @@ private void appendAsync(BatchProcessConfig bpc, DomWriter writer, Element prope
private void appendPipesReporters(DomWriter writer, Element async, BatchProcessConfig bpc) {
Element compositePipesReporter = writer.createAndGetElement(async, "pipesReporter", "class",
"org.apache.tika.pipes.CompositePipesReporter");
Element params = writer.createAndGetElement(compositePipesReporter, "params");
Element pipesReporters = writer.createAndGetElement(params, "pipesReporters", "class",
"org.apache.tika.pipes.PipesReporter");
Element fsReporter = writer.createAndGetElement(pipesReporters, "pipesReporter", "class",
//Element params = writer.createAndGetElement(compositePipesReporter, "params");
Element fsReporter = writer.createAndGetElement(compositePipesReporter, "pipesReporter", "class",
"org.apache.tika.pipes.reporters.fs.FileSystemStatusReporter");
Element fsReporterParams = writer.createAndGetElement(fsReporter, "params");
writer.appendTextElement(fsReporterParams, "statusFile",
Expand All @@ -250,7 +248,7 @@ private void appendPipesReporters(DomWriter writer, Element async, BatchProcessC
Optional<String> connectionString = ((JDBCEmitterSpec) emitter).getConnectionString();

if (connectionString.isPresent()) {
Element jdbc = writer.createAndGetElement(pipesReporters, "pipesReporter", "class",
Element jdbc = writer.createAndGetElement(compositePipesReporter, "pipesReporter", "class",
"org.apache.tika.pipes.reporters.jdbc.JDBCPipesReporter");

Element jdbcParams = writer.createAndGetElement(jdbc, "params");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ public class CSVEmitterSpec extends JDBCEmitterSpec {
private Optional<String> csvFileName = Optional.empty();
private volatile boolean closed = false;

private Connection connection = null;

public CSVEmitterSpec(@JsonProperty("metadataTuples") List<MetadataTuple> metadataTuples) {
super(metadataTuples);
setTableName(CSV_DB_TABLE_NAME);
Expand All @@ -64,8 +66,8 @@ public ValidationResult initialize() throws IOException {
}
tmpDbDirectory = Optional.of(Files.createTempDirectory("tika-app-csv-tmp"));
LOGGER.debug("tmp db directory: {}", tmpDbDirectory.get().toAbsolutePath());
setConnectionString("jdbc:sqlite:" + tmpDbDirectory.get().toAbsolutePath() +
"/tika-gui-v2-tmp-csv-db.db");
setConnectionString("jdbc:h2:" + tmpDbDirectory.get().toAbsolutePath() +
"/tika-gui-v2-tmp-csv-db;AUTO_SERVER=TRUE");
try {
createTable();
} catch (SQLException e) {
Expand Down Expand Up @@ -104,11 +106,10 @@ private void createTable() throws SQLException {
LOGGER.warn("connection string is empty?!");
return;
}
try (Connection connection = DriverManager.getConnection(getConnectionString().get())) {
try (Statement st = connection.createStatement()) {
st.execute(dropTable);
st.execute(createTable.toString());
}
connection = DriverManager.getConnection(getConnectionString().get());
try (Statement st = connection.createStatement()) {
st.execute(dropTable);
st.execute(createTable.toString());
}
}

Expand Down Expand Up @@ -146,26 +147,25 @@ private void writeCSV() throws IOException {
LOGGER.debug("about to write " + csvPath.get().toAbsolutePath());
int rows = 0;
try (OutputStream os = Files.newOutputStream(csvPath.get());
CSVPrinter printer = new CSVPrinter(new OutputStreamWriter(os, UTF_8),
CSVFormat.EXCEL)) {
CSVPrinter printer = new CSVPrinter(new OutputStreamWriter(os, UTF_8), CSVFormat.EXCEL)) {
writeHeaders(printer);

try (Connection connection = DriverManager.getConnection(getConnectionString().get())) {
try (Statement st = connection.createStatement()) {
List<String> cells = new ArrayList<>();
Integer columnCount = null;
try (ResultSet rs = st.executeQuery(select)) {
while (rs.next()) {
if (columnCount == null) {
columnCount = rs.getMetaData().getColumnCount();
}
writeRow(rs, printer, cells, columnCount);
cells.clear();
rows++;

try (Statement st = connection.createStatement()) {
List<String> cells = new ArrayList<>();
Integer columnCount = null;
try (ResultSet rs = st.executeQuery(select)) {
while (rs.next()) {
if (columnCount == null) {
columnCount = rs.getMetaData().getColumnCount();
}
writeRow(rs, printer, cells, columnCount);
cells.clear();
rows++;
}
}
}

} catch (SQLException e) {
LOGGER.warn("Failed to write CSV", e);
} catch (IOException e) {
Expand Down Expand Up @@ -229,6 +229,11 @@ private String getSelect() {
}

private void cleanCSVTempResources() throws IOException {
try {
connection.close();
} catch (SQLException e) {
LOGGER.warn("problem closing db?!", e);
}
if (tmpDbDirectory.isEmpty()) {
LOGGER.warn("tmpdb has not been set ?!");
return;
Expand Down

0 comments on commit d3ef56e

Please sign in to comment.