diff --git a/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/DatasetLoader.java b/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/DatasetLoader.java index 4ae2138c07..c17183091e 100644 --- a/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/DatasetLoader.java +++ b/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/DatasetLoader.java @@ -24,6 +24,7 @@ import java.io.InputStream; import java.net.URL; import java.net.URLConnection; +import java.nio.file.Path; import java.security.DigestInputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -250,12 +251,20 @@ private void extract(File aArchive, ArchiveInputStream aArchiveStream, File aTar throw new IllegalStateException("Filename must not contain line break"); } - File out = new File(aTarget, name); + Path base = aTarget.toPath().toAbsolutePath(); + Path out = base.resolve(name).toAbsolutePath(); + + if (!out.startsWith(base)) { + // Ignore attempts to write outside the base + continue; + } + if (entry.isDirectory()) { - FileUtils.forceMkdir(out); + FileUtils.forceMkdir(out.toFile()); } else { - FileUtils.copyInputStreamToFile(new CloseShieldInputStream(aArchiveStream), out); + FileUtils.copyInputStreamToFile(new CloseShieldInputStream(aArchiveStream), + out.toFile()); } } } diff --git a/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/internal/actions/Explode.java b/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/internal/actions/Explode.java index 37758aefe1..a07fff7ad3 100644 --- a/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/internal/actions/Explode.java +++ b/dkpro-core-api-datasets-asl/src/main/java/de/tudarmstadt/ukp/dkpro/core/api/datasets/internal/actions/Explode.java @@ -39,6 +39,8 @@ import org.apache.commons.compress.compressors.CompressorStreamFactory; import org.apache.commons.io.FilenameUtils; import org.apache.commons.io.IOUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import com.github.junrar.Archive; import com.github.junrar.exception.RarException; @@ -54,6 +56,8 @@ public class Explode extends Action_ImplBase { + private final Log LOG = LogFactory.getLog(getClass()); + @Override public void apply(ActionDescription aAction, DatasetDescription aDataset, ArtifactDescription aPack, Path aCachedFile) @@ -99,11 +103,11 @@ public void apply(ActionDescription aAction, DatasetDescription aDataset, } } - private void extract7z(ActionDescription aAction, Path aCachedFile, Path aTarget) + private void extract7z(ActionDescription aAction, Path aArchive, Path aTarget) throws IOException, RarException { // We always extract archives into a subfolder. Figure out the name of the folder. - String base = getBase(aCachedFile.getFileName().toString()); + Path base = aTarget.resolve(getPathWithoutFileExtension(aArchive)).toAbsolutePath(); Map cfg = aAction.getConfiguration(); int strip = cfg.containsKey("strip") ? (int) cfg.get("strip") : 0; @@ -111,7 +115,10 @@ private void extract7z(ActionDescription aAction, Path aCachedFile, Path aTarget AntFileFilter filter = new AntFileFilter(coerceToList(cfg.get("includes")), coerceToList(cfg.get("excludes"))); - try (SevenZFile archive = new SevenZFile(aCachedFile.toFile())) { + LOG.info("Extracting files of [" + aArchive.getFileName() + "] to [" + aTarget.resolve(base) + + "]"); + + try (SevenZFile archive = new SevenZFile(aArchive.toFile())) { SevenZArchiveEntry entry = archive.getNextEntry(); while (entry != null) { String name = stripLeadingFolders(entry.getName(), strip); @@ -122,7 +129,13 @@ private void extract7z(ActionDescription aAction, Path aCachedFile, Path aTarget } if (filter.accept(name)) { - Path out = aTarget.resolve(base).resolve(name); + Path out = base.resolve(name).toAbsolutePath(); + if (!out.startsWith(base)) { + throw new IOException( + "Archive tries to generate file outside target folder: [" + name + + "]"); + } + if (entry.isDirectory()) { Files.createDirectories(out); } @@ -140,11 +153,11 @@ private void extract7z(ActionDescription aAction, Path aCachedFile, Path aTarget } } - private void extractRar(ActionDescription aAction, Path aCachedFile, Path aTarget) + private void extractRar(ActionDescription aAction, Path aArchive, Path aTarget) throws IOException, RarException { // We always extract archives into a subfolder. Figure out the name of the folder. - String base = getBase(aCachedFile.getFileName().toString()); + Path base = aTarget.resolve(getPathWithoutFileExtension(aArchive)).toAbsolutePath(); Map cfg = aAction.getConfiguration(); int strip = cfg.containsKey("strip") ? (int) cfg.get("strip") : 0; @@ -152,7 +165,10 @@ private void extractRar(ActionDescription aAction, Path aCachedFile, Path aTarge AntFileFilter filter = new AntFileFilter(coerceToList(cfg.get("includes")), coerceToList(cfg.get("excludes"))); - try (Archive archive = new Archive(new FileVolumeManager(aCachedFile.toFile()))) { + LOG.info("Extracting files of [" + aArchive.getFileName() + "] to [" + aTarget.resolve(base) + + "]"); + + try (Archive archive = new Archive(new FileVolumeManager(aArchive.toFile()))) { FileHeader fh = archive.nextFileHeader(); while (fh != null) { String name = stripLeadingFolders(fh.getFileNameString(), strip); @@ -163,7 +179,13 @@ private void extractRar(ActionDescription aAction, Path aCachedFile, Path aTarge } if (filter.accept(name)) { - Path out = aTarget.resolve(base).resolve(name); + Path out = base.resolve(name).toAbsolutePath(); + if (!out.startsWith(base)) { + throw new IOException( + "Archive tries to generate file outside target folder: [" + name + + "]"); + } + if (fh.isDirectory()) { Files.createDirectories(out); } @@ -185,7 +207,7 @@ private void extract(ActionDescription aAction, Path aArchive, ArchiveInputStrea throws IOException { // We always extract archives into a subfolder. Figure out the name of the folder. - String base = getBase(aArchive.getFileName().toString()); + Path base = aTarget.resolve(getPathWithoutFileExtension(aArchive)).toAbsolutePath(); Map cfg = aAction.getConfiguration(); int strip = cfg.containsKey("strip") ? (int) cfg.get("strip") : 0; @@ -193,6 +215,9 @@ private void extract(ActionDescription aAction, Path aArchive, ArchiveInputStrea AntFileFilter filter = new AntFileFilter(coerceToList(cfg.get("includes")), coerceToList(cfg.get("excludes"))); + LOG.info("Extracting files of [" + aArchive.getFileName() + "] to [" + aTarget.resolve(base) + + "]"); + ArchiveEntry entry = null; while ((entry = aAStream.getNextEntry()) != null) { String name = stripLeadingFolders(entry.getName(), strip); @@ -203,7 +228,12 @@ private void extract(ActionDescription aAction, Path aArchive, ArchiveInputStrea } if (filter.accept(name)) { - Path out = aTarget.resolve(base).resolve(name); + Path out = base.resolve(name).toAbsolutePath(); + if (!out.startsWith(base)) { + throw new IOException( + "Archive tries to generate file outside target folder: [" + name + "]"); + } + if (entry.isDirectory()) { Files.createDirectories(out); } @@ -217,6 +247,10 @@ private void extract(ActionDescription aAction, Path aArchive, ArchiveInputStrea private String stripLeadingFolders(String aName, int aLevels) { + if (aName == null) { + return null; + } + if (aLevels > 0) { Path p = Paths.get(aName); if (p.getNameCount() <= aLevels) { @@ -233,10 +267,16 @@ private String stripLeadingFolders(String aName, int aLevels) } } - public static String getBase(String aFilename) + /** + * The the name of the archive without any extensions (e.g. in the case of multiple extensions + * such as .tar.gz). + */ + public static String getPathWithoutFileExtension(Path aFilename) { + + // We always extract archives into a subfolder. Figure out the name of the folder. - String base = aFilename; + String base = aFilename.getFileName().toString(); while (base.contains(".")) { base = FilenameUtils.removeExtension(base); }