8260617: Merge ZipFile encoding check with the initial hash calculation

Reviewed-by: lancea
openjdk · Feb 3, 2021 · c8de943 · c8de943
1 parent ae2c5f0
commit c8de943
Show file tree

Hide file tree

Showing 2 changed files with 109 additions and 125 deletions.
diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java
@@ -54,14 +54,6 @@ public static ZipCoder get(Charset charset) {
         return new ZipCoder(charset);
     }
 
-    void checkEncoding(byte[] a, int pos, int nlen) throws ZipException {
-        try {
-            toString(a, pos, nlen);
-        } catch(Exception e) {
-            throw new ZipException("invalid CEN header (bad entry name)");
-        }
-    }
-
     String toString(byte[] ba, int off, int length) {
         try {
             return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
@@ -98,10 +90,6 @@ static String toStringUTF8(byte[] ba, int len) {
         return UTF8.toString(ba, 0, len);
     }
 
-    static String toStringUTF8(byte[] ba, int off, int len) {
-        return UTF8.toString(ba, off, len);
-    }
-
     boolean isUTF8() {
         return false;
     }
@@ -110,15 +98,33 @@ boolean isUTF8() {
     // we first decoded the byte sequence to a String, then appended '/' if no
     // trailing slash was found, then called String.hashCode(). This
     // normalization ensures we can simplify and speed up lookups.
-    int normalizedHash(byte[] a, int off, int len) {
+    //
+    // Does encoding error checking and hashing in a single pass for efficiency.
+    // On an error, this function will throw CharacterCodingException while the
+    // UTF8ZipCoder override will throw IllegalArgumentException, so we declare
+    // throws Exception to keep things simple.
+    int checkedHash(byte[] a, int off, int len) throws Exception {
         if (len == 0) {
             return 0;
         }
-        return normalizedHashDecode(0, a, off, off + len);
+
+        int h = 0;
+        // cb will be a newly allocated CharBuffer with pos == 0,
+        // arrayOffset == 0, backed by an array.
+        CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, len));
+        int limit = cb.limit();
+        char[] decoded = cb.array();
+        for (int i = 0; i < limit; i++) {
+            h = 31 * h + decoded[i];
+        }
+        if (limit > 0 && decoded[limit - 1] != '/') {
+            h = 31 * h + '/';
+        }
+        return h;
     }
 
-    // Matching normalized hash code function for Strings
-    static int normalizedHash(String name) {
+    // Hash function equivalent of checkedHash for String inputs
+    static int hash(String name) {
         int hsh = name.hashCode();
         int len = name.length();
         if (len > 0 && name.charAt(len - 1) != '/') {
@@ -133,29 +139,6 @@ boolean hasTrailingSlash(byte[] a, int end) {
             Arrays.mismatch(a, end - slashBytes.length, end, slashBytes, 0, slashBytes.length) == -1;
     }
 
-    // Implements normalizedHash by decoding byte[] to char[] and then computing
-    // the hash. This is a slow-path used for non-UTF8 charsets and also when
-    // aborting the ASCII fast-path in the UTF8 implementation, so {@code h}
-    // might be a partially calculated hash code
-    int normalizedHashDecode(int h, byte[] a, int off, int end) {
-        try {
-            // cb will be a newly allocated CharBuffer with pos == 0,
-            // arrayOffset == 0, backed by an array.
-            CharBuffer cb = decoder().decode(ByteBuffer.wrap(a, off, end - off));
-            int limit = cb.limit();
-            char[] decoded = cb.array();
-            for (int i = 0; i < limit; i++) {
-                h = 31 * h + decoded[i];
-            }
-            if (limit > 0 && decoded[limit - 1] != '/') {
-                h = 31 * h + '/';
-            }
-        } catch (CharacterCodingException cce) {
-            // Ignore - return the hash code generated so far.
-        }
-        return h;
-    }
-
     private byte[] slashBytes;
     private final Charset cs;
     protected CharsetDecoder dec;
@@ -211,25 +194,6 @@ boolean isUTF8() {
             return true;
         }
 
-        @Override
-        void checkEncoding(byte[] a, int pos, int len) throws ZipException {
-            try {
-                int end = pos + len;
-                while (pos < end) {
-                    // ASCII fast-path: When checking that a range of bytes is
-                    // valid UTF-8, we can avoid some allocation by skipping
-                    // past bytes in the 0-127 range
-                    if (a[pos] < 0) {
-                        ZipCoder.toStringUTF8(a, pos, end - pos);
-                        break;
-                    }
-                    pos++;
-                }
-            } catch(Exception e) {
-                throw new ZipException("invalid CEN header (bad entry name)");
-            }
-        }
-
         @Override
         String toString(byte[] ba, int off, int length) {
             return JLA.newStringUTF8NoRepl(ba, off, length);
@@ -241,7 +205,7 @@ byte[] getBytes(String s) {
         }
 
         @Override
-        int normalizedHash(byte[] a, int off, int len) {
+        int checkedHash(byte[] a, int off, int len) throws Exception {
             if (len == 0) {
                 return 0;
             }
@@ -250,18 +214,17 @@ int normalizedHash(byte[] a, int off, int len) {
             int h = 0;
             while (off < end) {
                 byte b = a[off];
-                if (b < 0) {
+                if (b >= 0) {
+                    // ASCII, keep going
+                    h = 31 * h + b;
+                    off++;
+                } else {
                     // Non-ASCII, fall back to decoding a String
                     // We avoid using decoder() here since the UTF8ZipCoder is
                     // shared and that decoder is not thread safe.
-                    // We also avoid the JLA.newStringUTF8NoRepl variant at
-                    // this point to avoid throwing exceptions eagerly when
-                    // opening ZipFiles (exceptions are expected when accessing
-                    // malformed entries.)
-                    return normalizedHash(new String(a, end - len, len, UTF_8.INSTANCE));
-                } else {
-                    h = 31 * h + b;
-                    off++;
+                    // We use the JLA.newStringUTF8NoRepl variant to throw
+                    // exceptions eagerly when opening ZipFiles
+                    return hash(JLA.newStringUTF8NoRepl(a, end - len, len));
                 }
             }