Skip to content

Commit

Permalink
8260617: Merge ZipFile encoding check with the initial hash calculation
Browse files Browse the repository at this point in the history
Reviewed-by: lancea
  • Loading branch information
cl4es committed Feb 3, 2021
1 parent ae2c5f0 commit c8de943
Show file tree
Hide file tree
Showing 2 changed files with 109 additions and 125 deletions.
99 changes: 31 additions & 68 deletions src/java.base/share/classes/java/util/zip/ZipCoder.java
Expand Up @@ -54,14 +54,6 @@ public static ZipCoder get(Charset charset) {
return new ZipCoder(charset);
}

void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
try {
toString(a, pos, nlen);
} catch(Exception e) {
throw new ZipException("invalid CEN header (bad entry name)");
}
}

String toString(byte[] ba, int off, int length) {
try {
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
Expand Down Expand Up @@ -98,10 +90,6 @@ static String toStringUTF8(byte[] ba, int len) {
return UTF8.toString(ba, 0, len);
}

static String toStringUTF8(byte[] ba, int off, int len) {
return UTF8.toString(ba, off, len);
}

boolean isUTF8() {
return false;
}
Expand All @@ -110,15 +98,33 @@ boolean isUTF8() {
// we first decoded the byte sequence to a String, then appended '/' if no
// trailing slash was found, then called String.hashCode(). This
// normalization ensures we can simplify and speed up lookups.
int normalizedHash(byte[] a, int off, int len) {
//
// Does encoding error checking and hashing in a single pass for efficiency.
// On an error, this function will throw CharacterCodingException while the
// UTF8ZipCoder override will throw IllegalArgumentException, so we declare
// throws Exception to keep things simple.
int checkedHash(byte[] a, int off, int len) throws Exception {
if (len == 0) {
return 0;
}
return normalizedHashDecode(0, a, off, off + len);

int h = 0;
// cb will be a newly allocated CharBuffer with pos == 0,
// arrayOffset == 0, backed by an array.
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len));
int limit = cb.limit();
char[] decoded = cb.array();
for (int i = 0; i < limit; i++) {
h = 31 * h + decoded[i];
}
if (limit > 0 && decoded[limit - 1] != '/') {
h = 31 * h + '/';
}
return h;
}

// Matching normalized hash code function for Strings
static int normalizedHash(String name) {
// Hash function equivalent of checkedHash for String inputs
static int hash(String name) {
int hsh = name.hashCode();
int len = name.length();
if (len > 0 && name.charAt(len - 1) != '/') {
Expand All @@ -133,29 +139,6 @@ boolean hasTrailingSlash(byte[] a, int end) {
Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
}

// Implements normalizedHash by decoding byte[] to char[] and then computing
// the hash. This is a slow-path used for non-UTF8 charsets and also when
// aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
// might be a partially calculated hash code
int normalizedHashDecode(int h, byte[] a, int off, int end) {
try {
// cb will be a newly allocated CharBuffer with pos == 0,
// arrayOffset == 0, backed by an array.
CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
int limit = cb.limit();
char[] decoded = cb.array();
for (int i = 0; i < limit; i++) {
h = 31 * h + decoded[i];
}
if (limit > 0 && decoded[limit - 1] != '/') {
h = 31 * h + '/';
}
} catch (CharacterCodingException cce) {
// Ignore - return the hash code generated so far.
}
return h;
}

private byte[] slashBytes;
private final Charset cs;
protected CharsetDecoder dec;
Expand Down Expand Up @@ -211,25 +194,6 @@ boolean isUTF8() {
return true;
}

@Override
void checkEncoding(byte[] a, int pos, int len) throws ZipException {
try {
int end = pos + len;
while (pos < end) {
// ASCII fast-path: When checking that a range of bytes is
// valid UTF-8, we can avoid some allocation by skipping
// past bytes in the 0-127 range
if (a[pos] < 0) {
ZipCoder.toStringUTF8(a, pos, end - pos);
break;
}
pos++;
}
} catch(Exception e) {
throw new ZipException("invalid CEN header (bad entry name)");
}
}

@Override
String toString(byte[] ba, int off, int length) {
return JLA.newStringUTF8NoRepl(ba, off, length);
Expand All @@ -241,7 +205,7 @@ byte[] getBytes(String s) {
}

@Override
int normalizedHash(byte[] a, int off, int len) {
int checkedHash(byte[] a, int off, int len) throws Exception {
if (len == 0) {
return 0;
}
Expand All @@ -250,18 +214,17 @@ int normalizedHash(byte[] a, int off, int len) {
int h = 0;
while (off < end) {
byte b = a[off];
if (b < 0) {
if (b >= 0) {
// ASCII, keep going
h = 31 * h + b;
off++;
} else {
// Non-ASCII, fall back to decoding a String
// We avoid using decoder() here since the UTF8ZipCoder is
// shared and that decoder is not thread safe.
// We also avoid the JLA.newStringUTF8NoRepl variant at
// this point to avoid throwing exceptions eagerly when
// opening ZipFiles (exceptions are expected when accessing
// malformed entries.)
return normalizedHash(new String(a, end - len, len, UTF_8.INSTANCE));
} else {
h = 31 * h + b;
off++;
// We use the JLA.newStringUTF8NoRepl variant to throw
// exceptions eagerly when opening ZipFiles
return hash(JLA.newStringUTF8NoRepl(a, end - len, len));
}
}

Expand Down

0 comments on commit c8de943

Please sign in to comment.