From d0518e7c71a5c3009b4db0efba441355178537ff Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Tue, 11 Mar 2025 11:08:42 +0530 Subject: [PATCH 01/15] 8347712: IllegalStateException on multithreaded ZipFile access with non-UTF8 charset --- .../share/classes/java/util/zip/ZipCoder.java | 10 +- .../share/classes/java/util/zip/ZipFile.java | 135 +++++++++++------- .../zip/ZipFile/ZipFileSharedSourceTest.java | 134 +++++++++++++++++ 3 files changed, 227 insertions(+), 52 deletions(-) create mode 100644 test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 8d4a05389eede..1546748771227 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 2009, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -38,7 +38,13 @@ import sun.nio.cs.UTF_8; /** - * Utility class for ZIP file entry name and comment decoding and encoding + * Utility class for ZIP file entry name and comment decoding and encoding. + *

+ * The {@linkplain #UTF8 UTF-8 ZipCoder instance}, which gets returned by {@link #get(Charset)} + * for {@linkplain java.nio.charset.StandardCharsets#UTF_8 UTF-8 Charset} is thread-safe + * and can be used by concurrently by multiple threads. All other {@code ZipCoder} instances + * are not thread-safe and external synchronization is required by callers, if those + * instances are to be used concurrently by multiple threads. */ class ZipCoder { diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index f736a092099e1..dc25618146651 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1,5 +1,5 @@ /* - * Copyright (c) 1995, 2024, Oracle and/or its affiliates. All rights reserved. + * Copyright (c) 1995, 2025, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it @@ -82,6 +82,9 @@ public class ZipFile implements ZipConstants, Closeable { private final String filePath; // ZIP file path private final String fileName; // name of the file + // the ZipCoder instance is derived from the Charset passed to the ZipFile constructor + // and will be used for decoding the non-UTF-8 entry names and the ZIP file comment. + private final ZipCoder entryNameCommentCoder; private volatile boolean closeRequested; // The "resource" used by this ZIP file that needs to be @@ -198,7 +201,8 @@ public ZipFile(File file, int mode, Charset charset) throws IOException this.fileName = file.getName(); long t0 = System.nanoTime(); - this.res = new CleanableResource(this, ZipCoder.get(charset), file, mode); + this.res = new CleanableResource(this, charset, file, mode); + this.entryNameCommentCoder = ZipCoder.get(charset); PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0); PerfCounter.getZipFileCount().increment(); @@ -265,7 +269,7 @@ public String getComment() { // If there is a problem decoding the byte array which represents // the ZIP file comment, return null; try { - return res.zsrc.zc.toString(res.zsrc.comment); + return entryNameCommentCoder.toString(res.zsrc.comment); } catch (IllegalArgumentException iae) { return null; } @@ -288,7 +292,7 @@ public ZipEntry getEntry(String name) { // Look up the name and CEN header position of the entry. // The resolved name may include a trailing slash. // See Source::getEntryPos for details. - EntryPos pos = res.zsrc.getEntryPos(name, true); + EntryPos pos = res.zsrc.getEntryPos(name, true, entryNameCommentCoder); if (pos != null) { entry = getZipEntry(pos.name, pos.pos); } @@ -328,7 +332,7 @@ public InputStream getInputStream(ZipEntry entry) throws IOException { if (Objects.equals(lastEntryName, entry.name)) { pos = lastEntryPos; } else { - EntryPos entryPos = zsrc.getEntryPos(entry.name, false); + EntryPos entryPos = zsrc.getEntryPos(entry.name, false, entryNameCommentCoder); if (entryPos != null) { pos = entryPos.pos; } else { @@ -363,6 +367,16 @@ public InputStream getInputStream(ZipEntry entry) throws IOException { } } + /** + * {@return true if the ZIP entry corresponding to the given {@code pos} uses UTF-8 + * for entry name and comment field encoding, false otherwise} + * @param cen the CEN + * @param pos the entry position + */ + private static boolean useUTF8Coder(final byte[] cen, final int pos) { + return (CENFLG(cen, pos) & USE_UTF8) != 0; + } + private static class InflaterCleanupAction implements Runnable { private final Inflater inf; private final CleanableResource res; @@ -561,7 +575,7 @@ public Stream stream() { private String getEntryName(int pos) { byte[] cen = res.zsrc.cen; int nlen = CENNAM(cen, pos); - ZipCoder zc = res.zsrc.zipCoderForPos(pos); + ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : entryNameCommentCoder; return zc.toString(cen, pos + CENHDR, nlen); } @@ -632,7 +646,7 @@ private ZipEntry getZipEntry(String name, int pos) { } if (clen != 0) { int start = pos + CENHDR + nlen + elen; - ZipCoder zc = res.zsrc.zipCoderForPos(pos); + ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : entryNameCommentCoder; e.comment = zc.toString(cen, start, clen); } lastEntryName = e.name; @@ -664,11 +678,11 @@ private static class CleanableResource implements Runnable { Source zsrc; - CleanableResource(ZipFile zf, ZipCoder zc, File file, int mode) throws IOException { + CleanableResource(ZipFile zf, Charset entryNameCharset, File file, int mode) throws IOException { this.cleanable = CleanerFactory.cleaner().register(zf, this); this.istreams = Collections.newSetFromMap(new WeakHashMap<>()); this.inflaterCache = new ArrayDeque<>(); - this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zc); + this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, entryNameCharset); } void clean() { @@ -1109,6 +1123,10 @@ public void setExternalFileAttributes(ZipEntry ze, int externalFileAttributes) { // Represents the resolved name and position of a CEN record static record EntryPos(String name, int pos) {} + // Implementation note: This class must be thread-safe. + // Each instance of Source could be shared between multiple different instances of ZipFile. + // Although ZipFile isn't thread-safe, the fact that separate instances of ZipFile could + // still concurrently use the same Source instance, mandates that Source must be thread-safe. private static class Source { // While this is only used from ZipFile, defining it there would cause // a bootstrap cycle that would leave this initialized as null @@ -1121,7 +1139,6 @@ private static class Source { private static final int MAX_CEN_SIZE = ArraysSupport.SOFT_MAX_ARRAY_LENGTH; private final Key key; // the key in files - private final @Stable ZipCoder zc; // ZIP coder used to decode/encode private int refs = 1; @@ -1157,8 +1174,9 @@ private static class Source { private int[] entries; // array of hashed cen entry // Checks the entry at offset pos in the CEN, calculates the Entry values as per above, - // then returns the length of the entry name. - private int checkAndAddEntry(int pos, int index) + // then returns the length of the entry name. Uses the given zipCoder for processing the + // entry name and the entry comment (if any). + private int checkAndAddEntry(final int pos, final int index, final ZipCoder zipCoder) throws ZipException { byte[] cen = this.cen; @@ -1196,21 +1214,20 @@ private int checkAndAddEntry(int pos, int index) } try { - ZipCoder zcp = zipCoderForPos(pos); - int hash = zcp.checkedHash(cen, entryPos, nlen); + int hash = zipCoder.checkedHash(cen, entryPos, nlen); int hsh = (hash & 0x7fffffff) % tablelen; int next = table[hsh]; table[hsh] = index; // Record the CEN offset and the name hash in our hash cell. - entries[index++] = hash; - entries[index++] = next; - entries[index ] = pos; + entries[index] = hash; + entries[index + 1] = next; + entries[index + 2] = pos; // Validate comment if it exists. // If the bytes representing the comment cannot be converted to // a String via zcp.toString, an Exception will be thrown if (clen > 0) { int start = entryPos + nlen + elen; - zcp.toString(cen, start, clen); + zipCoder.toString(cen, start, clen); } } catch (Exception e) { zerror("invalid CEN header (bad entry name or comment)"); @@ -1389,34 +1406,42 @@ private static boolean isZip64ExtBlockSizeValid(int blockSize, long csize, private int tablelen; // number of hash heads /** - * A class representing a key to a ZIP file. A key is based - * on the file key if available, or the path value if the - * file key is not available. The key is also based on the - * file's last modified time to allow for cases where a ZIP - * file is re-opened after it has been modified. + * A class representing a key to the Source of a ZipFile. + * The Key is composed of: + * - The BasicFileAttributes.fileKey() if available, or the Path of the ZIP file + * if the fileKey() is not available. + * - The ZIP file's last modified time (to allow for cases + * where a ZIP file is re-opened after it has been modified). + * - The Charset, that was provided when constructing a ZipFile instance, + * for reading non-UTF-8 entry names and comments. + * The unique combination of these components identifies a Source of a ZipFile. */ private static class Key { - final BasicFileAttributes attrs; - File file; - final boolean utf8; + private final BasicFileAttributes attrs; + private final File file; + // the Charset to be used for processing non-UTF-8 entry names in the ZIP file + // and the ZIP file comment + private final Charset entryNameCommentCharset; - public Key(File file, BasicFileAttributes attrs, ZipCoder zc) { + public Key(File file, BasicFileAttributes attrs, Charset entryNameCommentCharset) { this.attrs = attrs; this.file = file; - this.utf8 = zc.isUTF8(); + this.entryNameCommentCharset = entryNameCommentCharset; } + @Override public int hashCode() { - long t = utf8 ? 0 : Long.MAX_VALUE; + long t = entryNameCommentCharset.hashCode(); t += attrs.lastModifiedTime().toMillis(); Object fk = attrs.fileKey(); return Long.hashCode(t) + (fk != null ? fk.hashCode() : file.hashCode()); } + @Override public boolean equals(Object obj) { if (obj instanceof Key key) { - if (key.utf8 != utf8) { + if (!entryNameCommentCharset.equals(key.entryNameCommentCharset)) { return false; } if (!attrs.lastModifiedTime().equals(key.attrs.lastModifiedTime())) { @@ -1440,12 +1465,12 @@ public boolean equals(Object obj) { private static final java.nio.file.FileSystem builtInFS = DefaultFileSystemProvider.theFileSystem(); - static Source get(File file, boolean toDelete, ZipCoder zc) throws IOException { + static Source get(File file, boolean toDelete, Charset entryNameCharset) throws IOException { final Key key; try { key = new Key(file, Files.readAttributes(builtInFS.getPath(file.getPath()), - BasicFileAttributes.class), zc); + BasicFileAttributes.class), entryNameCharset); } catch (InvalidPathException ipe) { throw new IOException(ipe); } @@ -1457,7 +1482,7 @@ static Source get(File file, boolean toDelete, ZipCoder zc) throws IOException { return src; } } - src = new Source(key, toDelete, zc); + src = new Source(key, toDelete); synchronized (files) { Source prev = files.putIfAbsent(key, src); @@ -1479,8 +1504,7 @@ static void release(Source src) throws IOException { } } - private Source(Key key, boolean toDelete, ZipCoder zc) throws IOException { - this.zc = zc; + private Source(Key key, boolean toDelete) throws IOException { this.key = key; if (toDelete) { if (OperatingSystem.isWindows()) { @@ -1705,6 +1729,7 @@ private void initCEN(int knownTotal) throws IOException { int idx = 0; // Index into the entries array int pos = 0; manifestNum = 0; + ZipCoder zipCoder = null; int limit = cen.length - CENHDR; while (pos <= limit) { if (idx >= entriesLength) { @@ -1716,8 +1741,20 @@ private void initCEN(int knownTotal) throws IOException { } int entryPos = pos + CENHDR; + final ZipCoder entryZipCoder; + if (useUTF8Coder(cen, pos)) { + // entry name explicitly wants UTF-8 Charset + entryZipCoder = ZipCoder.UTF8; + } else { + // use the ZipCoder corresponding to the Charset that + // was provided when constructing the ZipFile + if (zipCoder == null) { + zipCoder = ZipCoder.get(key.entryNameCommentCharset); + } + entryZipCoder = zipCoder; + } // Checks the entry and adds values to entries[idx ... idx+2] - int nlen = checkAndAddEntry(pos, idx); + int nlen = checkAndAddEntry(pos, idx, entryZipCoder); idx += 3; // Adds name to metanames. @@ -1741,7 +1778,7 @@ private void initCEN(int knownTotal) throws IOException { try { // Compute hash code of name from "META-INF/versions/{version)/{name} int prefixLen = META_INF_VERSIONS_LEN + DecimalDigits.stringSize(version); - int hashCode = zipCoderForPos(pos).checkedHash(cen, + int hashCode = entryZipCoder.checkedHash(cen, entryPos + prefixLen, nlen - prefixLen); // Register version for this hash code @@ -1788,7 +1825,8 @@ private static void zerror(String msg) throws ZipException { * Returns the resolved name and position of the ZIP cen entry corresponding * to the specified entry name, or {@code null} if not found. */ - private EntryPos getEntryPos(String name, boolean addSlash) { + private EntryPos getEntryPos(final String name, final boolean addSlash, + final ZipCoder entryNameCommentCoder) { if (total == 0) { return null; } @@ -1807,8 +1845,15 @@ private EntryPos getEntryPos(String name, boolean addSlash) { int noff = pos + CENHDR; int nlen = CENNAM(cen, pos); - ZipCoder zc = zipCoderForPos(pos); - + final ZipCoder zc; + if (useUTF8Coder(cen, pos)) { + // entry name explicitly wants UTF-8 Charset + zc = ZipCoder.UTF8; + } else { + // use the ZipCoder corresponding to the Charset that + // was provided when constructing the ZipFile + zc = entryNameCommentCoder; + } // Compare the lookup name with the name encoded in the CEN switch (zc.compare(name, cen, noff, nlen, addSlash)) { case ZipCoder.EXACT_MATCH: @@ -1834,16 +1879,6 @@ private EntryPos getEntryPos(String name, boolean addSlash) { return null; } - private ZipCoder zipCoderForPos(int pos) { - if (zc.isUTF8()) { - return zc; - } - if ((CENFLG(cen, pos) & USE_UTF8) != 0) { - return ZipCoder.UTF8; - } - return zc; - } - /** * Returns true if the bytes represent a non-directory name * beginning with "META-INF/", disregarding ASCII case. diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java new file mode 100644 index 0000000000000..9df378b9296ef --- /dev/null +++ b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java @@ -0,0 +1,134 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.OutputStream; +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.ArrayList; +import java.util.List; +import java.util.concurrent.Callable; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; +import java.util.concurrent.Future; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import static java.nio.charset.StandardCharsets.US_ASCII; + +/* + * @test + * @bug 8347712 + * @summary verify that different instances of java.util.zip.ZipFile do not share + * the same instance of (non-thread-safe) java.nio.charset.CharsetEncoder/CharsetDecoder + * @run junit ZipFileSharedSourceTest + */ +public class ZipFileSharedSourceTest { + + static Path createZipFile(final Charset entryNameCharset) throws Exception { + final Path zipFilePath = Files.createTempFile(Path.of("."), "8347712", ".zip"); + try (OutputStream os = Files.newOutputStream(zipFilePath); + ZipOutputStream zos = new ZipOutputStream(os, entryNameCharset)) { + final int numEntries = 10240; + for (int i = 1; i <= numEntries; i++) { + final ZipEntry entry = new ZipEntry("entry-" + i); + zos.putNextEntry(entry); + zos.write("foo bar".getBytes(US_ASCII)); + zos.closeEntry(); + } + } + return zipFilePath; + } + + static List charsets() { + return List.of( + Arguments.of(StandardCharsets.UTF_8), + Arguments.of(StandardCharsets.ISO_8859_1), + Arguments.of(US_ASCII) + ); + } + + /** + * Creates multiple instances of java.util.zip.ZipFile with the given {@code entryNameCharset} + * for the same underlying ZIP file. Then each instance of the ZipFile is iterated + * over its entries, concurrently in a separate thread. Verifies that such access + * to independent ZipFile instances, doesn't lead to unexpected failures contributed + * by concurrent thread execution. + */ + @ParameterizedTest + @MethodSource("charsets") + void testMultipleZipFileInstances(final Charset entryNameCommentCharset) throws Exception { + final Path zipFilePath = createZipFile(entryNameCommentCharset); + final int numTasks = 200; + final CountDownLatch startLatch = new CountDownLatch(numTasks); + final List> results = new ArrayList<>(); + try (final ExecutorService executor = + Executors.newThreadPerTaskExecutor(Thread.ofPlatform().factory())) { + for (int i = 0; i < numTasks; i++) { + final var task = new ZipEntryIteratingTask(zipFilePath, entryNameCommentCharset, + startLatch); + results.add(executor.submit(task)); + } + System.out.println(numTasks + " tasks submitted, waiting for them to complete"); + for (final Future f : results) { + f.get(); + } + } + System.out.println("All " + numTasks + " tasks completed successfully"); + } + + private static final class ZipEntryIteratingTask implements Callable { + private final Path file; + private final Charset entryNameCharset; + private final CountDownLatch startLatch; + + private ZipEntryIteratingTask(final Path file, final Charset entryNameCharset, + final CountDownLatch startLatch) { + this.file = file; + this.entryNameCharset = entryNameCharset; + this.startLatch = startLatch; + } + + @Override + public Void call() throws Exception { + // let other tasks know we are ready to run + this.startLatch.countDown(); + // wait for other tasks to be ready to run + this.startLatch.await(); + // create a new instance of ZipFile and iterate over the entries + try (final ZipFile zf = new ZipFile(this.file.toFile(), this.entryNameCharset)) { + final var entries = zf.entries(); + while (entries.hasMoreElements()) { + entries.nextElement(); + } + } + return null; + } + } +} From b9734fa4042a72b8ed66f5c0cfeb8a5549845ee1 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Tue, 11 Mar 2025 20:12:03 +0530 Subject: [PATCH 02/15] tiny typo fix in newly introduced documentation --- src/java.base/share/classes/java/util/zip/ZipCoder.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 1546748771227..4b4ce46c70275 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -42,7 +42,7 @@ *

* The {@linkplain #UTF8 UTF-8 ZipCoder instance}, which gets returned by {@link #get(Charset)} * for {@linkplain java.nio.charset.StandardCharsets#UTF_8 UTF-8 Charset} is thread-safe - * and can be used by concurrently by multiple threads. All other {@code ZipCoder} instances + * and can be used concurrently by multiple threads. All other {@code ZipCoder} instances * are not thread-safe and external synchronization is required by callers, if those * instances are to be used concurrently by multiple threads. */ From b6486e7a4d2d18b29277cdc79288c5b6732630c9 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 12 Mar 2025 11:55:27 +0530 Subject: [PATCH 03/15] rename field --- .../share/classes/java/util/zip/ZipFile.java | 49 +++++++++++-------- 1 file changed, 28 insertions(+), 21 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index dc25618146651..e180a33e6f9d1 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -84,7 +84,7 @@ public class ZipFile implements ZipConstants, Closeable { private final String fileName; // name of the file // the ZipCoder instance is derived from the Charset passed to the ZipFile constructor // and will be used for decoding the non-UTF-8 entry names and the ZIP file comment. - private final ZipCoder entryNameCommentCoder; + private final ZipCoder zipCoder; private volatile boolean closeRequested; // The "resource" used by this ZIP file that needs to be @@ -202,7 +202,7 @@ public ZipFile(File file, int mode, Charset charset) throws IOException long t0 = System.nanoTime(); this.res = new CleanableResource(this, charset, file, mode); - this.entryNameCommentCoder = ZipCoder.get(charset); + this.zipCoder = ZipCoder.get(charset); PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0); PerfCounter.getZipFileCount().increment(); @@ -269,7 +269,7 @@ public String getComment() { // If there is a problem decoding the byte array which represents // the ZIP file comment, return null; try { - return entryNameCommentCoder.toString(res.zsrc.comment); + return zipCoder.toString(res.zsrc.comment); } catch (IllegalArgumentException iae) { return null; } @@ -292,7 +292,7 @@ public ZipEntry getEntry(String name) { // Look up the name and CEN header position of the entry. // The resolved name may include a trailing slash. // See Source::getEntryPos for details. - EntryPos pos = res.zsrc.getEntryPos(name, true, entryNameCommentCoder); + EntryPos pos = res.zsrc.getEntryPos(name, true, zipCoder); if (pos != null) { entry = getZipEntry(pos.name, pos.pos); } @@ -332,7 +332,7 @@ public InputStream getInputStream(ZipEntry entry) throws IOException { if (Objects.equals(lastEntryName, entry.name)) { pos = lastEntryPos; } else { - EntryPos entryPos = zsrc.getEntryPos(entry.name, false, entryNameCommentCoder); + EntryPos entryPos = zsrc.getEntryPos(entry.name, false, zipCoder); if (entryPos != null) { pos = entryPos.pos; } else { @@ -575,7 +575,7 @@ public Stream stream() { private String getEntryName(int pos) { byte[] cen = res.zsrc.cen; int nlen = CENNAM(cen, pos); - ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : entryNameCommentCoder; + ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : zipCoder; return zc.toString(cen, pos + CENHDR, nlen); } @@ -646,7 +646,7 @@ private ZipEntry getZipEntry(String name, int pos) { } if (clen != 0) { int start = pos + CENHDR + nlen + elen; - ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : entryNameCommentCoder; + ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : zipCoder; e.comment = zc.toString(cen, start, clen); } lastEntryName = e.name; @@ -678,11 +678,11 @@ private static class CleanableResource implements Runnable { Source zsrc; - CleanableResource(ZipFile zf, Charset entryNameCharset, File file, int mode) throws IOException { + CleanableResource(ZipFile zf, Charset charset, File file, int mode) throws IOException { this.cleanable = CleanerFactory.cleaner().register(zf, this); this.istreams = Collections.newSetFromMap(new WeakHashMap<>()); this.inflaterCache = new ArrayDeque<>(); - this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, entryNameCharset); + this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, charset); } void clean() { @@ -1421,17 +1421,24 @@ private static class Key { private final File file; // the Charset to be used for processing non-UTF-8 entry names in the ZIP file // and the ZIP file comment - private final Charset entryNameCommentCharset; - - public Key(File file, BasicFileAttributes attrs, Charset entryNameCommentCharset) { + private final Charset charset; + + /** + * Constructs a {@code Key} to a {@code Source} of a {@code ZipFile} + * + * @param file the ZIP file + * @param attrs the attributes of the ZIP file + * @param charset the Charset that was provided when constructing the ZipFile instance + */ + public Key(File file, BasicFileAttributes attrs, Charset charset) { this.attrs = attrs; this.file = file; - this.entryNameCommentCharset = entryNameCommentCharset; + this.charset = charset; } @Override public int hashCode() { - long t = entryNameCommentCharset.hashCode(); + long t = charset.hashCode(); t += attrs.lastModifiedTime().toMillis(); Object fk = attrs.fileKey(); return Long.hashCode(t) + @@ -1441,7 +1448,7 @@ public int hashCode() { @Override public boolean equals(Object obj) { if (obj instanceof Key key) { - if (!entryNameCommentCharset.equals(key.entryNameCommentCharset)) { + if (!charset.equals(key.charset)) { return false; } if (!attrs.lastModifiedTime().equals(key.attrs.lastModifiedTime())) { @@ -1465,12 +1472,12 @@ public boolean equals(Object obj) { private static final java.nio.file.FileSystem builtInFS = DefaultFileSystemProvider.theFileSystem(); - static Source get(File file, boolean toDelete, Charset entryNameCharset) throws IOException { + static Source get(File file, boolean toDelete, Charset charset) throws IOException { final Key key; try { key = new Key(file, Files.readAttributes(builtInFS.getPath(file.getPath()), - BasicFileAttributes.class), entryNameCharset); + BasicFileAttributes.class), charset); } catch (InvalidPathException ipe) { throw new IOException(ipe); } @@ -1749,7 +1756,7 @@ private void initCEN(int knownTotal) throws IOException { // use the ZipCoder corresponding to the Charset that // was provided when constructing the ZipFile if (zipCoder == null) { - zipCoder = ZipCoder.get(key.entryNameCommentCharset); + zipCoder = ZipCoder.get(key.charset); } entryZipCoder = zipCoder; } @@ -1823,10 +1830,10 @@ private static void zerror(String msg) throws ZipException { /* * Returns the resolved name and position of the ZIP cen entry corresponding - * to the specified entry name, or {@code null} if not found. + * to the specified entry name, or {@code null} if not found. */ private EntryPos getEntryPos(final String name, final boolean addSlash, - final ZipCoder entryNameCommentCoder) { + final ZipCoder zipCoder) { if (total == 0) { return null; } @@ -1852,7 +1859,7 @@ private EntryPos getEntryPos(final String name, final boolean addSlash, } else { // use the ZipCoder corresponding to the Charset that // was provided when constructing the ZipFile - zc = entryNameCommentCoder; + zc = zipCoder; } // Compare the lookup name with the name encoded in the CEN switch (zc.compare(name, cen, noff, nlen, addSlash)) { From 935b04ed00522aa92105292baa0693c55b1356ae Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 12 Mar 2025 12:14:03 +0530 Subject: [PATCH 04/15] add code comment --- src/java.base/share/classes/java/util/zip/ZipFile.java | 1 + 1 file changed, 1 insertion(+) diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index e180a33e6f9d1..5d0e56b7e8d4e 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1748,6 +1748,7 @@ private void initCEN(int knownTotal) throws IOException { } int entryPos = pos + CENHDR; + // the ZipCoder for any non-UTF8 entries final ZipCoder entryZipCoder; if (useUTF8Coder(cen, pos)) { // entry name explicitly wants UTF-8 Charset From c2f29aea7353bbd2e37cb75e68402f4c1a54d89f Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 19 Mar 2025 19:35:30 +0530 Subject: [PATCH 05/15] Eirik's inputs - replace useUTF8Coder() with zipCoderFor() --- .../share/classes/java/util/zip/ZipCoder.java | 7 ++ .../share/classes/java/util/zip/ZipFile.java | 78 +++++++++---------- 2 files changed, 46 insertions(+), 39 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 4b4ce46c70275..7b5db1c7dd62b 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -180,6 +180,13 @@ protected CharsetDecoder decoder() { return dec; } + /** + * {@return the {@link Charset} used this {@code ZipCoder}} + */ + final Charset charset() { + return this.cs; + } + private CharsetEncoder encoder() { if (enc == null) { enc = cs.newEncoder() diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index 5d0e56b7e8d4e..a4d14741f2feb 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -201,8 +201,8 @@ public ZipFile(File file, int mode, Charset charset) throws IOException this.fileName = file.getName(); long t0 = System.nanoTime(); - this.res = new CleanableResource(this, charset, file, mode); this.zipCoder = ZipCoder.get(charset); + this.res = new CleanableResource(this, zipCoder, file, mode); PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0); PerfCounter.getZipFileCount().increment(); @@ -368,13 +368,32 @@ public InputStream getInputStream(ZipEntry entry) throws IOException { } /** - * {@return true if the ZIP entry corresponding to the given {@code pos} uses UTF-8 - * for entry name and comment field encoding, false otherwise} + * Determines and returns a {@link ZipCoder} to use for decoding + * name and comment fields of the ZIP entry identified by the {@code pos} + * in the ZIP file's {@code cen}. + *

+ * A ZIP entry's name and comment fields may be encoded using UTF-8, in + * which case this method returns a UTF-8 capable {@code ZipCoder}. If the + * entry doesn't require UTF-8, then this method returns the {@code fallback} + * {@code ZipCoder}. + * * @param cen the CEN - * @param pos the entry position + * @param pos the ZIP entry's position in CEN + * @param fallback the fallback ZipCoder to return if the entry doesn't require UTF-8 */ - private static boolean useUTF8Coder(final byte[] cen, final int pos) { - return (CENFLG(cen, pos) & USE_UTF8) != 0; + private static ZipCoder zipCoderFor(final byte[] cen, final int pos, final ZipCoder fallback) { + if (fallback.isUTF8()) { + // the fallback ZipCoder is capable of handling UTF-8, + // so no need to parse the entry flags to determine if + // the entry has UTF-8 flag. + return fallback; + } + if ((CENFLG(cen, pos) & USE_UTF8) != 0) { + // entry requires a UTF-8 ZipCoder + return ZipCoder.UTF8; + } + // entry doesn't require a UTF-8 ZipCoder + return fallback; } private static class InflaterCleanupAction implements Runnable { @@ -575,7 +594,7 @@ public Stream stream() { private String getEntryName(int pos) { byte[] cen = res.zsrc.cen; int nlen = CENNAM(cen, pos); - ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : zipCoder; + ZipCoder zc = zipCoderFor(cen, pos, zipCoder); return zc.toString(cen, pos + CENHDR, nlen); } @@ -646,7 +665,7 @@ private ZipEntry getZipEntry(String name, int pos) { } if (clen != 0) { int start = pos + CENHDR + nlen + elen; - ZipCoder zc = useUTF8Coder(cen, pos) ? ZipCoder.UTF8 : zipCoder; + ZipCoder zc = zipCoderFor(cen, pos, zipCoder); e.comment = zc.toString(cen, start, clen); } lastEntryName = e.name; @@ -678,11 +697,12 @@ private static class CleanableResource implements Runnable { Source zsrc; - CleanableResource(ZipFile zf, Charset charset, File file, int mode) throws IOException { + CleanableResource(ZipFile zf, ZipCoder zipCoder, File file, int mode) throws IOException { + assert zipCoder != null : "null ZipCoder"; this.cleanable = CleanerFactory.cleaner().register(zf, this); this.istreams = Collections.newSetFromMap(new WeakHashMap<>()); this.inflaterCache = new ArrayDeque<>(); - this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, charset); + this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zipCoder); } void clean() { @@ -1472,12 +1492,12 @@ public boolean equals(Object obj) { private static final java.nio.file.FileSystem builtInFS = DefaultFileSystemProvider.theFileSystem(); - static Source get(File file, boolean toDelete, Charset charset) throws IOException { + static Source get(File file, boolean toDelete, ZipCoder zipCoder) throws IOException { final Key key; try { key = new Key(file, Files.readAttributes(builtInFS.getPath(file.getPath()), - BasicFileAttributes.class), charset); + BasicFileAttributes.class), zipCoder.charset()); } catch (InvalidPathException ipe) { throw new IOException(ipe); } @@ -1489,7 +1509,7 @@ static Source get(File file, boolean toDelete, Charset charset) throws IOExcepti return src; } } - src = new Source(key, toDelete); + src = new Source(key, toDelete, zipCoder); synchronized (files) { Source prev = files.putIfAbsent(key, src); @@ -1511,7 +1531,7 @@ static void release(Source src) throws IOException { } } - private Source(Key key, boolean toDelete) throws IOException { + private Source(Key key, boolean toDelete, ZipCoder zipCoder) throws IOException { this.key = key; if (toDelete) { if (OperatingSystem.isWindows()) { @@ -1525,7 +1545,7 @@ private Source(Key key, boolean toDelete) throws IOException { this.zfile = new RandomAccessFile(key.file, "r"); } try { - initCEN(-1); + initCEN(-1, zipCoder); byte[] buf = new byte[4]; readFullyAt(buf, 0, 4, 0); this.startsWithLoc = (LOCSIG(buf) == LOCSIG); @@ -1680,7 +1700,7 @@ private End findEND() throws IOException { } // Reads ZIP file central directory. - private void initCEN(int knownTotal) throws IOException { + private void initCEN(final int knownTotal, final ZipCoder zipCoder) throws IOException { // Prefer locals for better performance during startup byte[] cen; if (knownTotal == -1) { @@ -1736,31 +1756,19 @@ private void initCEN(int knownTotal) throws IOException { int idx = 0; // Index into the entries array int pos = 0; manifestNum = 0; - ZipCoder zipCoder = null; int limit = cen.length - CENHDR; while (pos <= limit) { if (idx >= entriesLength) { // This will only happen if the ZIP file has an incorrect // ENDTOT field, which usually means it contains more than // 65535 entries. - initCEN(countCENHeaders(cen)); + initCEN(countCENHeaders(cen), zipCoder); return; } int entryPos = pos + CENHDR; // the ZipCoder for any non-UTF8 entries - final ZipCoder entryZipCoder; - if (useUTF8Coder(cen, pos)) { - // entry name explicitly wants UTF-8 Charset - entryZipCoder = ZipCoder.UTF8; - } else { - // use the ZipCoder corresponding to the Charset that - // was provided when constructing the ZipFile - if (zipCoder == null) { - zipCoder = ZipCoder.get(key.charset); - } - entryZipCoder = zipCoder; - } + final ZipCoder entryZipCoder = zipCoderFor(cen, pos, zipCoder); // Checks the entry and adds values to entries[idx ... idx+2] int nlen = checkAndAddEntry(pos, idx, entryZipCoder); idx += 3; @@ -1853,15 +1861,7 @@ private EntryPos getEntryPos(final String name, final boolean addSlash, int noff = pos + CENHDR; int nlen = CENNAM(cen, pos); - final ZipCoder zc; - if (useUTF8Coder(cen, pos)) { - // entry name explicitly wants UTF-8 Charset - zc = ZipCoder.UTF8; - } else { - // use the ZipCoder corresponding to the Charset that - // was provided when constructing the ZipFile - zc = zipCoder; - } + final ZipCoder zc = zipCoderFor(cen, pos, zipCoder); // Compare the lookup name with the name encoded in the CEN switch (zc.compare(name, cen, noff, nlen, addSlash)) { case ZipCoder.EXACT_MATCH: From bca4d6bb649c9dbf6d5b33278d591eb788658b85 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 19 Mar 2025 19:47:45 +0530 Subject: [PATCH 06/15] Eirik's suggestion - update test to even call ZipFile.getEntry() --- test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java index 9df378b9296ef..594a46afda1c6 100644 --- a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java +++ b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java @@ -125,7 +125,9 @@ public Void call() throws Exception { try (final ZipFile zf = new ZipFile(this.file.toFile(), this.entryNameCharset)) { final var entries = zf.entries(); while (entries.hasMoreElements()) { - entries.nextElement(); + final ZipEntry ze = entries.nextElement(); + // additionally exercise the ZipFile.getEntry() method + zf.getEntry(ze.getName()); } } return null; From 325739229062293c87f9deee621032946b25caca Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 19 Mar 2025 19:50:30 +0530 Subject: [PATCH 07/15] rename entryNameCharset to charset in the test --- .../zip/ZipFile/ZipFileSharedSourceTest.java | 20 +++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java index 594a46afda1c6..50b4320cfca75 100644 --- a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java +++ b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java @@ -51,10 +51,10 @@ */ public class ZipFileSharedSourceTest { - static Path createZipFile(final Charset entryNameCharset) throws Exception { + static Path createZipFile(final Charset charset) throws Exception { final Path zipFilePath = Files.createTempFile(Path.of("."), "8347712", ".zip"); try (OutputStream os = Files.newOutputStream(zipFilePath); - ZipOutputStream zos = new ZipOutputStream(os, entryNameCharset)) { + ZipOutputStream zos = new ZipOutputStream(os, charset)) { final int numEntries = 10240; for (int i = 1; i <= numEntries; i++) { final ZipEntry entry = new ZipEntry("entry-" + i); @@ -75,7 +75,7 @@ static List charsets() { } /** - * Creates multiple instances of java.util.zip.ZipFile with the given {@code entryNameCharset} + * Creates multiple instances of java.util.zip.ZipFile with the given {@code charset} * for the same underlying ZIP file. Then each instance of the ZipFile is iterated * over its entries, concurrently in a separate thread. Verifies that such access * to independent ZipFile instances, doesn't lead to unexpected failures contributed @@ -83,15 +83,15 @@ static List charsets() { */ @ParameterizedTest @MethodSource("charsets") - void testMultipleZipFileInstances(final Charset entryNameCommentCharset) throws Exception { - final Path zipFilePath = createZipFile(entryNameCommentCharset); + void testMultipleZipFileInstances(final Charset charset) throws Exception { + final Path zipFilePath = createZipFile(charset); final int numTasks = 200; final CountDownLatch startLatch = new CountDownLatch(numTasks); final List> results = new ArrayList<>(); try (final ExecutorService executor = Executors.newThreadPerTaskExecutor(Thread.ofPlatform().factory())) { for (int i = 0; i < numTasks; i++) { - final var task = new ZipEntryIteratingTask(zipFilePath, entryNameCommentCharset, + final var task = new ZipEntryIteratingTask(zipFilePath, charset, startLatch); results.add(executor.submit(task)); } @@ -105,13 +105,13 @@ void testMultipleZipFileInstances(final Charset entryNameCommentCharset) throws private static final class ZipEntryIteratingTask implements Callable { private final Path file; - private final Charset entryNameCharset; + private final Charset charset; private final CountDownLatch startLatch; - private ZipEntryIteratingTask(final Path file, final Charset entryNameCharset, + private ZipEntryIteratingTask(final Path file, final Charset charset, final CountDownLatch startLatch) { this.file = file; - this.entryNameCharset = entryNameCharset; + this.charset = charset; this.startLatch = startLatch; } @@ -122,7 +122,7 @@ public Void call() throws Exception { // wait for other tasks to be ready to run this.startLatch.await(); // create a new instance of ZipFile and iterate over the entries - try (final ZipFile zf = new ZipFile(this.file.toFile(), this.entryNameCharset)) { + try (final ZipFile zf = new ZipFile(this.file.toFile(), this.charset)) { final var entries = zf.entries(); while (entries.hasMoreElements()) { final ZipEntry ze = entries.nextElement(); From 83ac59fcd292c21bbff4e0ac243a6bf07b4b21dc Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 19 Mar 2025 19:58:16 +0530 Subject: [PATCH 08/15] Eirik's suggestion - update test method comment --- .../util/zip/ZipFile/ZipFileSharedSourceTest.java | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java index 50b4320cfca75..d4d057ede7bf5 100644 --- a/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java +++ b/test/jdk/java/util/zip/ZipFile/ZipFileSharedSourceTest.java @@ -75,11 +75,12 @@ static List charsets() { } /** - * Creates multiple instances of java.util.zip.ZipFile with the given {@code charset} - * for the same underlying ZIP file. Then each instance of the ZipFile is iterated - * over its entries, concurrently in a separate thread. Verifies that such access - * to independent ZipFile instances, doesn't lead to unexpected failures contributed - * by concurrent thread execution. + * In this test, multiple concurrent threads each create an instance of java.util.zip.ZipFile + * with the given {@code charset} for the same underlying ZIP file. Each of the threads + * then iterate over the entries of their ZipFile instance. The test verifies that such access, + * where each thread is accessing an independent ZipFile instance corresponding to the same + * underlying ZIP file, doesn't lead to unexpected failures contributed by concurrent + * threads. */ @ParameterizedTest @MethodSource("charsets") From 88d4c77fc1f0e84ffd0f0146d30c49e52a4588a8 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Sun, 23 Mar 2025 14:40:40 +0530 Subject: [PATCH 09/15] Alan's suggestion - trim the javadoc of (internal) ZipCoder class --- src/java.base/share/classes/java/util/zip/ZipCoder.java | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index 7b5db1c7dd62b..a59cbc1f480ab 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -40,11 +40,8 @@ /** * Utility class for ZIP file entry name and comment decoding and encoding. *

- * The {@linkplain #UTF8 UTF-8 ZipCoder instance}, which gets returned by {@link #get(Charset)} - * for {@linkplain java.nio.charset.StandardCharsets#UTF_8 UTF-8 Charset} is thread-safe - * and can be used concurrently by multiple threads. All other {@code ZipCoder} instances - * are not thread-safe and external synchronization is required by callers, if those - * instances are to be used concurrently by multiple threads. + * The {@code ZipCoder} for UTF-8 charset is thread safe, {@code ZipCoder} + * for other charsets require external synchronization. */ class ZipCoder { From 306d40e10c942365a37486d047dd62825b70cf62 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Sun, 23 Mar 2025 14:42:24 +0530 Subject: [PATCH 10/15] Alan's suggestion - change code comment about Source class being thread safe --- src/java.base/share/classes/java/util/zip/ZipFile.java | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index a4d14741f2feb..ad9fa0771a4c0 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1143,10 +1143,7 @@ public void setExternalFileAttributes(ZipEntry ze, int externalFileAttributes) { // Represents the resolved name and position of a CEN record static record EntryPos(String name, int pos) {} - // Implementation note: This class must be thread-safe. - // Each instance of Source could be shared between multiple different instances of ZipFile. - // Although ZipFile isn't thread-safe, the fact that separate instances of ZipFile could - // still concurrently use the same Source instance, mandates that Source must be thread-safe. + // Implementation note: This class is be thread safe. private static class Source { // While this is only used from ZipFile, defining it there would cause // a bootstrap cycle that would leave this initialized as null From 4e3f8e70ae32669f7b97a8ebae83cc3e22639b8b Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Sun, 23 Mar 2025 14:44:26 +0530 Subject: [PATCH 11/15] improve code comment for ZipFile.zipCoder --- src/java.base/share/classes/java/util/zip/ZipFile.java | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index ad9fa0771a4c0..311d25142b75d 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -82,8 +82,7 @@ public class ZipFile implements ZipConstants, Closeable { private final String filePath; // ZIP file path private final String fileName; // name of the file - // the ZipCoder instance is derived from the Charset passed to the ZipFile constructor - // and will be used for decoding the non-UTF-8 entry names and the ZIP file comment. + // ZipCoder for entry names and comments when not using UTF-8 private final ZipCoder zipCoder; private volatile boolean closeRequested; From 9d9de1aededaeb413c367b541969e276fc3c7d1c Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 30 Apr 2025 17:01:01 +0530 Subject: [PATCH 12/15] 8355975: introduce a test for 8355975 --- .../util/zip/ZipFile/ZipFileCharsetTest.java | 125 ++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java new file mode 100644 index 0000000000000..2b29726d344db --- /dev/null +++ b/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java @@ -0,0 +1,125 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ + +import java.io.IOException; +import java.nio.charset.Charset; +import java.nio.file.Files; +import java.nio.file.Path; +import java.util.zip.ZipEntry; +import java.util.zip.ZipFile; +import java.util.zip.ZipOutputStream; + +import org.junit.jupiter.api.Test; +import static java.nio.charset.StandardCharsets.ISO_8859_1; +import static java.nio.charset.StandardCharsets.US_ASCII; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assumptions.assumeTrue; + +/* + * @test + * @bug 8355975 + * @summary verify that the internal ZIP structure caching in java.util.zip.ZipFile + * uses the correct Charset when parsing the ZIP structure of a ZIP file + * @run junit ZipFileCharsetTest + */ +public class ZipFileCharsetTest { + + private static final String ISO_8859_15_NAME = "ISO-8859-15"; + + /** + * The internal implementation of java.util.zip.ZipFile maintains a cache + * of the ZIP structure of each ZIP file that's currently open. This cache + * helps prevent repeat parsing of the ZIP structure of the same underlying + * ZIP file, every time a ZipFile instance is created for the same ZIP file. + * The cache uses an internal key to map a ZIP file to the corresponding + * ZIP structure that's cached. + * A ZipFile can be constructed by passing a Charset which will be used to + * decode the entry names (and comment) in a ZIP file. + * The test here verifies that when multiple ZipFile instances are + * constructed using different Charsets but the same underlying ZIP file, + * then the internal caching implementation of ZipFile doesn't end up using + * a wrong Charset for parsing the ZIP structure of the ZIP file. + */ + @Test + void testCachedZipFileSource() throws Exception { + // ISO-8859-15 is not a standard charset in Java. We skip this test + // when it is unavailable + assumeTrue(Charset.availableCharsets().containsKey(ISO_8859_15_NAME), + "skipping test since " + ISO_8859_15_NAME + " charset isn't available"); + + // We choose the byte 0xA4 for entry name in the ZIP file. + // 0xA4 is "Euro sign" in ISO-8859-15 charset and + // "Currency sign (generic)" in ISO-8859-1 charset. + final byte[] entryNameBytes = new byte[]{(byte) 0xA4}; // intentional cast + final Charset euroSignCharset = Charset.forName(ISO_8859_15_NAME); + final Charset currencySignCharset = ISO_8859_1; + + final String euroSign = new String(entryNameBytes, euroSignCharset); + final String currencySign = new String(entryNameBytes, currencySignCharset); + + // create a ZIP file whose entry name is encoded using ISO-8859-15 charset + final Path zip = createZIP("euro", euroSignCharset, entryNameBytes); + + // Construct a ZipFile instance using the (incorrect) charset ISO-8859-1. + // While that ZipFile instance is still open (and the ZIP file structure + // still cached), construct another instance for the same ZIP file, using + // the (correct) charset ISO-8859-15. + try (ZipFile incorrect = new ZipFile(zip.toFile(), currencySignCharset); + ZipFile correct = new ZipFile(zip.toFile(), euroSignCharset)) { + + // correct encoding should resolve the entry name to euro sign + // and the entry should be thus be located + assertNotNull(correct.getEntry(euroSign), "euro sign entry missing in " + correct); + // correct encoding should not be able to find an entry name + // with the currency sign + assertNull(correct.getEntry(currencySign), "currency sign entry unexpectedly found in " + + correct); + + // incorrect encoding should resolve the entry name to currency sign + // and the entry should be thus be located by the currency sign name + assertNotNull(incorrect.getEntry(currencySign), "currency sign entry missing in " + + incorrect); + // incorrect encoding should not be able to find an entry name + // with the euro sign + assertNull(incorrect.getEntry(euroSign), "euro sign entry unexpectedly found in " + + incorrect); + } + } + + /** + * Creates and return ZIP file whose entry names are encoded using the given {@code charset} + */ + private static Path createZIP(final String fileNamePrefix, final Charset charset, + final byte[] entryNameBytes) throws IOException { + final Path zip = Files.createTempFile(Path.of("."), fileNamePrefix, ".zip"); + // create a ZIP file whose entry name(s) use the given charset + try (ZipOutputStream zos = new ZipOutputStream(Files.newOutputStream(zip), charset)) { + zos.putNextEntry(new ZipEntry(new String(entryNameBytes, charset))); + final byte[] entryContent = "doesnotmatter".getBytes(US_ASCII); + zos.write(entryContent); + zos.closeEntry(); + } + return zip; + } +} From e7d969f8528832103f6a00a109f1116bc4835467 Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 30 Apr 2025 17:12:45 +0530 Subject: [PATCH 13/15] fix comment typo --- src/java.base/share/classes/java/util/zip/ZipFile.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index 311d25142b75d..00acba7f48061 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -1142,7 +1142,7 @@ public void setExternalFileAttributes(ZipEntry ze, int externalFileAttributes) { // Represents the resolved name and position of a CEN record static record EntryPos(String name, int pos) {} - // Implementation note: This class is be thread safe. + // Implementation note: This class is thread safe. private static class Source { // While this is only used from ZipFile, defining it there would cause // a bootstrap cycle that would leave this initialized as null From 9a29b9606059e87c39d36e831ccbcd63bbae9e2e Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 30 Apr 2025 17:18:20 +0530 Subject: [PATCH 14/15] Eirik's review about code comments --- src/java.base/share/classes/java/util/zip/ZipCoder.java | 2 +- src/java.base/share/classes/java/util/zip/ZipFile.java | 8 +++----- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/src/java.base/share/classes/java/util/zip/ZipCoder.java b/src/java.base/share/classes/java/util/zip/ZipCoder.java index a59cbc1f480ab..2702b90f4f6fd 100644 --- a/src/java.base/share/classes/java/util/zip/ZipCoder.java +++ b/src/java.base/share/classes/java/util/zip/ZipCoder.java @@ -178,7 +178,7 @@ protected CharsetDecoder decoder() { } /** - * {@return the {@link Charset} used this {@code ZipCoder}} + * {@return the {@link Charset} used by this {@code ZipCoder}} */ final Charset charset() { return this.cs; diff --git a/src/java.base/share/classes/java/util/zip/ZipFile.java b/src/java.base/share/classes/java/util/zip/ZipFile.java index 00acba7f48061..b7d6f1ff71426 100644 --- a/src/java.base/share/classes/java/util/zip/ZipFile.java +++ b/src/java.base/share/classes/java/util/zip/ZipFile.java @@ -82,7 +82,7 @@ public class ZipFile implements ZipConstants, Closeable { private final String filePath; // ZIP file path private final String fileName; // name of the file - // ZipCoder for entry names and comments when not using UTF-8 + // Used when decoding entry names and comments private final ZipCoder zipCoder; private volatile boolean closeRequested; @@ -1428,15 +1428,13 @@ private static boolean isZip64ExtBlockSizeValid(int blockSize, long csize, * if the fileKey() is not available. * - The ZIP file's last modified time (to allow for cases * where a ZIP file is re-opened after it has been modified). - * - The Charset, that was provided when constructing a ZipFile instance, - * for reading non-UTF-8 entry names and comments. + * - The Charset that was provided when constructing the ZipFile instance. * The unique combination of these components identifies a Source of a ZipFile. */ private static class Key { private final BasicFileAttributes attrs; private final File file; - // the Charset to be used for processing non-UTF-8 entry names in the ZIP file - // and the ZIP file comment + // the Charset that was provided when constructing the ZipFile instance private final Charset charset; /** From 71cb9781862725130daccbff0b912bbcd410b5ec Mon Sep 17 00:00:00 2001 From: Jaikiran Pai Date: Wed, 30 Apr 2025 19:27:32 +0530 Subject: [PATCH 15/15] Lance's review - update code comment in the test --- test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java b/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java index 2b29726d344db..e97088247f444 100644 --- a/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java +++ b/test/jdk/java/util/zip/ZipFile/ZipFileCharsetTest.java @@ -56,7 +56,7 @@ public class ZipFileCharsetTest { * ZIP structure that's cached. * A ZipFile can be constructed by passing a Charset which will be used to * decode the entry names (and comment) in a ZIP file. - * The test here verifies that when multiple ZipFile instances are + * The test verifies that when multiple ZipFile instances are * constructed using different Charsets but the same underlying ZIP file, * then the internal caching implementation of ZipFile doesn't end up using * a wrong Charset for parsing the ZIP structure of the ZIP file.