Skip to content

Commit

Permalink
Proposed solution for issue 293 (OHDSI#293)
Browse files Browse the repository at this point in the history
  • Loading branch information
jan-at-the-hyve committed Feb 10, 2023
1 parent e8a3d01 commit 4199452
Show file tree
Hide file tree
Showing 4 changed files with 128 additions and 11 deletions.
9 changes: 5 additions & 4 deletions rabbit-core/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@

<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<apache-poi-version>4.1.2</apache-poi-version>
</properties>

<dependencies>
Expand Down Expand Up @@ -41,22 +42,22 @@
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi</artifactId>
<version>4.1.2</version>
<version>${apache-poi-version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml</artifactId>
<version>4.1.2</version>
<version>${apache-poi-version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-excelant</artifactId>
<version>4.1.2</version>
<version>${apache-poi-version}</version>
</dependency>
<dependency>
<groupId>org.apache.poi</groupId>
<artifactId>poi-ooxml-schemas</artifactId>
<version>4.1.2</version>
<version>4.1.2</version>
</dependency>
<!-- Note: Apache xmlbeans v4.x and v5.x is incompatible with Apache poi v4-->
<dependency>
Expand Down
7 changes: 7 additions & 0 deletions whiterabbit/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,13 @@
<artifactId>slf4j-simple</artifactId>
<version>1.7.30</version>
</dependency>
<!-- https://mvnrepository.com/artifact/commons-io/commons-io -->
<dependency>
<groupId>commons-io</groupId>
<artifactId>commons-io</artifactId>
<version>2.11.0</version>
</dependency>


<!-- https://mvnrepository.com/artifact/org.testcontainers/testcontainers -->
<dependency>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,9 @@
package org.ohdsi.whiteRabbit.scan;

import java.io.*;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.time.LocalDate;
Expand All @@ -29,11 +32,13 @@
import com.epam.parso.SasFileProperties;
import com.epam.parso.SasFileReader;
import com.epam.parso.impl.SasFileReaderImpl;
import org.apache.commons.lang.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.commons.io.FileUtils;
import org.ohdsi.databases.DbType;
import org.ohdsi.databases.RichConnection;
import org.ohdsi.databases.RichConnection.QueryResult;
Expand Down Expand Up @@ -70,6 +75,15 @@ public class SourceDataScan {

private LocalDateTime startTimeStamp;

static final String poiTmpPath;

static {
try {
poiTmpPath = setUniqueTempDirStrategyForApachePoi();
} catch (IOException e) {
throw new RuntimeException(e);
}
}

public void setSampleSize(int sampleSize) {
// -1 if sample size is not restricted
Expand Down Expand Up @@ -117,6 +131,32 @@ public void process(DbSettings dbSettings, String outputFileName) {
generateReport(outputFileName);
}

public static String setUniqueTempDirStrategyForApachePoi() throws IOException {
// TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal
final String poiFilesDir = "poifiles";
Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir);
Path myTmpPath = defaultTmpPath;
if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) {
// problem: someone else (?) already created the default tmp path for poi, but we cannot use it: it's readonly for us.
// solution: create our own tmp directory, taking property "org.ohdsi.whiterabbit.poi.tmpdir" into account if set.
String poiFilesDirProperty = System.getProperty("org.ohdsi.whiterabbit.poi.tmpdir");
if (!StringUtils.isEmpty(poiFilesDirProperty)) {
// use what the user configured.
myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDirProperty, UUID.randomUUID().toString());
}
else {
// avoid the poifiles directory entirely
myTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), UUID.randomUUID().toString());
}
File tmpDir = Files.createDirectories(myTmpPath).toFile();
org.apache.poi.util.TempFile.setTempFileCreationStrategy(new org.apache.poi.util.DefaultTempFileCreationStrategy(tmpDir));
}

// return the path that is actually being used
// not needed for production code, by handy for unit testing this
return myTmpPath.toFile().getAbsolutePath();
}

private void processDatabase(DbSettings dbSettings) {
// GBQ requires database. Put database value into domain var
if (dbSettings.dbType == DbType.BIGQUERY) {
Expand Down Expand Up @@ -735,7 +775,6 @@ public void processValue(String value) {
samplingReservoir.add(DateUtilities.parseDate(trimValue));
}
}

}

public List<Pair<String, Integer>> getSortedValuesWithoutSmallValues() {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
package org.ohdsi.whiterabbit.scan;

import org.apache.commons.io.FileUtils;
import org.junit.jupiter.api.Tag;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.io.TempDir;
Expand All @@ -20,10 +21,8 @@
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.nio.file.Paths;
import java.util.*;

import static org.junit.jupiter.api.Assertions.*;

Expand Down Expand Up @@ -92,9 +91,9 @@ void testProcess(@TempDir Path tempDir) throws IOException {
sourceDataScan.process(dbSettings, outFile.toString());
assertTrue(Files.exists(outFile));

//
//
// Verify the contents of the generated xlsx file
//
//
FileInputStream file = new FileInputStream(new File(outFile.toUri()));

ReadXlsxFileWithHeader sheet = new ReadXlsxFileWithHeader(file);
Expand Down Expand Up @@ -125,6 +124,62 @@ void testProcess(@TempDir Path tempDir) throws IOException {
expectRowNIsLike(242, data, "visit_occurrence", "visit_type_concept_id", "", "integer", "0", "55261");
}

@Test
void testApachePoiTmpfileProblem(@TempDir Path tempDir) throws IOException {
// intends to verify solution of this bug: https://github.com/OHDSI/WhiteRabbit/issues/293

/*
* This tests a fix that assumes that the bug referenced here occurs in a multi-user situation where the
* first user running the scan, and causing /tmp/poifiles to created, does so by creating it read-only
* for everyone else. This directory is not automatically cleaned up, so every following user on the same
* system running the scan encounters the problem that /tmp/poifiles already exists and is read-only,
* causing a crash when the Apacho poi library attemps to create the xslx file.
*
* The class SourceDataScan has been extended with a static method, called implicitly once through a static{}
* block, to create a TempDir strategy to create a
* unique directory for each instance/run of WhiteRabbit. This effectively solves the assumes error
* situation.
*
* This test does not execute a multi-user situation, but emulates it by leaving the tmp directory in a
* read-only state after the first scan, and then confirming that a second scan fails. After that,
* a new unique tmp dir is enforced by invoking SourceDataScan.setUniqueTempDirStrategyForApachePoi(),
* and a new scan now runs successfully.
*/
// TODO: if/when updating poi to 5.x or higher, use DefaultTempFileCreationStrategy.POIFILES instead of a string literal
final String poiFilesDir = "poifiles";
Path defaultTmpPath = Paths.get(FileUtils.getTempDirectory().getAbsolutePath(), poiFilesDir);

// attempt to resolve an already existing unworkable situation where the default tmp dir for poi files is readonly
if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) {
assertTrue(defaultTmpPath.toFile().setWritable(true),
String.format("This test cannot run properly if %s exists but is not writeable. Either remove it or make it writeable",
defaultTmpPath.toFile().getAbsolutePath()));
}

// process should pass without problem, and afterwards the default tmp dir should exist
testProcess(tempDir);
assertTrue(Files.exists(defaultTmpPath));

// provoke the problem situation. make the default tmp dir readonly, try to process again
assertTrue(defaultTmpPath.toFile().setReadOnly());
RuntimeException thrown = assertThrows(RuntimeException.class, () -> {
testProcess(tempDir);
});
assertTrue(thrown.getMessage().endsWith("Permission denied"));

// invoke the static method to set a new tmp dir, process again (should succeed) and verify that
// the new tmpdir is indeed different from the default
String myTmpDir = SourceDataScan.setUniqueTempDirStrategyForApachePoi();
testProcess(tempDir);
assertNotEquals(defaultTmpPath.toFile().getAbsolutePath(), myTmpDir);

// we might have left behind an unworkable situation; attempt to solve that
if (Files.exists(defaultTmpPath) && !Files.isWritable(defaultTmpPath)) {
assertTrue(defaultTmpPath.toFile().setWritable(true));
}

}

private boolean expectRowNIsLike(int n, List<Row> rows, String... expectedValues) {
assert expectedValues.length == 6;
testColumnValue(n, rows.get(n), "Table", expectedValues[0]);
Expand Down Expand Up @@ -160,4 +215,19 @@ private DbSettings getTestDbSettings() {

return dbSettings;
}

private boolean deleteDir(File file) {
if (Files.exists(file.toPath())) {
File[] contents = file.listFiles();
if (contents != null) {
for (File f : contents) {
if (!Files.isSymbolicLink(f.toPath())) {
deleteDir(f);
}
}
}
return file.delete();
}
return true;
}
}

0 comments on commit 4199452

Please sign in to comment.