Skip to content

Commit

Permalink
Percent encodes file pathnames in manifests. Closes #33
Browse files Browse the repository at this point in the history
  • Loading branch information
richardrodgers committed Feb 17, 2022
1 parent 8f18c5d commit 68d40ea
Show file tree
Hide file tree
Showing 3 changed files with 56 additions and 6 deletions.
12 changes: 10 additions & 2 deletions src/main/java/edu/mit/lib/bagit/Bag.java
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ public class Bag {
// coding constants
static final String DEFAULT_CS_ALGO = "SHA-512";
static final String BAGIT_VSN = "1.0";
static final String LIB_VSN = "1.0";
static final String LIB_VSN = "1.1";
static final String DFLT_FMT = "zip";
static final String TGZIP_FMT = "tgz";
static final String SPACER = " ";
Expand Down Expand Up @@ -526,7 +526,7 @@ public Map<String, String> manifest(String relPath) throws IOException {
String line;
while((line = reader.readLine()) != null) {
String[] parts = line.split("\\s+", 2);
mfMap.put(parts[1], parts[0]);
mfMap.put(decodePath(parts[1]), parts[0]);
}
}
return mfMap;
Expand Down Expand Up @@ -652,6 +652,14 @@ static byte[] filterBytes(String data, Charset encoding, AtomicBoolean bomOut) {
}
}

static String encodePath(String path) {
return path.replaceAll("%", "%25").replaceAll("\n", "%0A").replaceAll("\r", "%0D");
}

static String decodePath(String path) {
return path.replaceAll("%25", "%").replaceAll("%0A", "\n").replaceAll("%0D", "\r");
}

private static final char[] HEX_CHARS = "0123456789abcdef".toCharArray();
static String toHex(byte[] data) {
if ((data == null) || (data.length == 0)) {
Expand Down
10 changes: 6 additions & 4 deletions src/main/java/edu/mit/lib/bagit/Filler.java
Original file line number Diff line number Diff line change
Expand Up @@ -404,7 +404,7 @@ public Filler payloadRef(String relPath, InputStream in, URI uri) throws IOExcep
var destFile = destDir.resolve("foo");
long size = digestCopy(in, destFile, DATA_PATH + relPath, manWriters);
var sizeStr = (size > 0L) ? Long.toString(size) : "-";
refWriter.writeLine(uri.toString() + " " + sizeStr + " " + DATA_PATH + relPath);
refWriter.writeLine(uri.toString() + " " + sizeStr + " " + DATA_PATH + encodePath(relPath));
Files.delete(destFile);
Files.delete(destDir);
return this;
Expand Down Expand Up @@ -434,11 +434,11 @@ public Filler payloadRefUnsafe(String relPath, long size, URI uri, Map<String, S
throw new IOException("checksums do not match bags");
}
for (String alg : manWriters.keySet()) {
manWriters.get(alg).writeLine(checksums.get(alg) + " " + relPath);
manWriters.get(alg).writeLine(checksums.get(alg) + " " + encodePath(relPath));
}
var sizeStr = (size > 0L) ? Long.toString(size) : "-";
FlatWriter refWriter = getWriter(REF_FILE);
refWriter.writeLine(uri.toString() + " " + sizeStr + " " + DATA_PATH + relPath);
refWriter.writeLine(uri.toString() + " " + sizeStr + " " + DATA_PATH + encodePath(relPath));
return this;
}

Expand Down Expand Up @@ -619,10 +619,12 @@ public void writeProperty(String key, String value) throws IOException {
}

public void writeLine(String line) throws IOException {
var encLine = line;
if (record) {
lines.add(line);
encLine = encodePath(line);
}
write(filterBytes(line + lineSeparator, encoding, bomOut));
write(filterBytes(encLine + lineSeparator, encoding, bomOut));
}

public List<String> getLines() {
Expand Down
40 changes: 40 additions & 0 deletions src/test/java/edu/mit/lib/bagit/BagTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.time.temporal.ChronoUnit;
import java.util.Scanner;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

Expand Down Expand Up @@ -853,6 +854,45 @@ public void validAndInvalidBagUTF16() throws IOException {
assertTrue(!bag.isValid());
}

@Test
public void percentPathNamePayloadBag() throws IOException {
Path bagFile = tempFolder.newFolder("bag37").toPath();
new Filler(bagFile).payload("first%1.pdf", payload1).payload("second/second%2.pdf", payload2).toDirectory();
Path payloadDir = bagFile.resolve(DATA_DIR);
assertTrue(Files.isDirectory(payloadDir));
Path pload1 = payloadDir.resolve("first%1.pdf");
assertTrue(Files.exists(pload1));
Path pload2 = payloadDir.resolve("second/second%2.pdf");
assertTrue(Files.exists(pload2));
// look in manifest files for encoded path names
String manifLine = readTextLine(bagFile.resolve("manifest-sha512.txt"), 0);
assertTrue(manifLine.endsWith("first%251.pdf"));
String manifLine2 = readTextLine(bagFile.resolve("manifest-sha512.txt"), 1);
assertTrue(manifLine2.endsWith("second%252.pdf"));
// assure completeness
Bag bag = load(bagFile);
assertTrue(bag.isComplete());
assertTrue(bag.isValid());
}

@Test
public void percentPathNameFetchBag() throws IOException, URISyntaxException {
Path bagFile = tempFolder.newFolder("bag38").toPath();
URI location = new URI("http://www.example.com/foo");
URI location2 = new URI("http://www.example.com/foo");
new Filler(bagFile).payloadRef("first%1.pdf", payload1, location).payloadRef("second/second%2.pdf", payload2, location2).toDirectory();
// look in fetch file for encoded path names
String manifLine = readTextLine(bagFile.resolve("fetch.txt"), 0);
assertTrue(manifLine.endsWith("first%251.pdf"));
String manifLine2 = readTextLine(bagFile.resolve("fetch.txt"), 1);
assertTrue(manifLine2.endsWith("second%252.pdf"));
}

private String readTextLine(Path file, int lineNum) throws IOException {
final List<String> lines = Files.readAllLines(file);
return lines.get(lineNum);
}

private String findSeparator(Path file) throws IOException {
try (Scanner scanner = new Scanner(file)) {
// it's one or the other
Expand Down

0 comments on commit 68d40ea

Please sign in to comment.