Skip to content

Commit 2c4e8d2

Browse files
jaikiranEirik Bjørsnøs
andcommitted
8347712: IllegalStateException on multithreaded ZipFile access with non-UTF8 charset
8355975: ZipFile uses incorrect Charset if another instance for the same ZIP file was constructed with a different Charset Co-authored-by: Eirik Bjørsnøs <eirbjo@openjdk.org> Reviewed-by: eirbjo, lancea, redestad, alanb
1 parent 530d14a commit 2c4e8d2

File tree

4 files changed

+366
-57
lines changed

4 files changed

+366
-57
lines changed

src/java.base/share/classes/java/util/zip/ZipCoder.java

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2009, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -38,7 +38,10 @@
3838
import sun.nio.cs.UTF_8;
3939

4040
/**
41-
* Utility class for ZIP file entry name and comment decoding and encoding
41+
* Utility class for ZIP file entry name and comment decoding and encoding.
42+
* <p>
43+
* The {@code ZipCoder} for UTF-8 charset is thread safe, {@code ZipCoder}
44+
* for other charsets require external synchronization.
4245
*/
4346
class ZipCoder {
4447

@@ -174,6 +177,13 @@ protected CharsetDecoder decoder() {
174177
return dec;
175178
}
176179

180+
/**
181+
* {@return the {@link Charset} used by this {@code ZipCoder}}
182+
*/
183+
final Charset charset() {
184+
return this.cs;
185+
}
186+
177187
private CharsetEncoder encoder() {
178188
if (enc == null) {
179189
enc = cs.newEncoder()

src/java.base/share/classes/java/util/zip/ZipFile.java

Lines changed: 92 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 1995, 2024, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 1995, 2025, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -82,6 +82,8 @@ public class ZipFile implements ZipConstants, Closeable {
8282

8383
private final String filePath; // ZIP file path
8484
private final String fileName; // name of the file
85+
// Used when decoding entry names and comments
86+
private final ZipCoder zipCoder;
8587
private volatile boolean closeRequested;
8688

8789
// The "resource" used by this ZIP file that needs to be
@@ -198,7 +200,8 @@ public ZipFile(File file, int mode, Charset charset) throws IOException
198200
this.fileName = file.getName();
199201
long t0 = System.nanoTime();
200202

201-
this.res = new CleanableResource(this, ZipCoder.get(charset), file, mode);
203+
this.zipCoder = ZipCoder.get(charset);
204+
this.res = new CleanableResource(this, zipCoder, file, mode);
202205

203206
PerfCounter.getZipFileOpenTime().addElapsedTimeFrom(t0);
204207
PerfCounter.getZipFileCount().increment();
@@ -265,7 +268,7 @@ public String getComment() {
265268
// If there is a problem decoding the byte array which represents
266269
// the ZIP file comment, return null;
267270
try {
268-
return res.zsrc.zc.toString(res.zsrc.comment);
271+
return zipCoder.toString(res.zsrc.comment);
269272
} catch (IllegalArgumentException iae) {
270273
return null;
271274
}
@@ -288,7 +291,7 @@ public ZipEntry getEntry(String name) {
288291
// Look up the name and CEN header position of the entry.
289292
// The resolved name may include a trailing slash.
290293
// See Source::getEntryPos for details.
291-
EntryPos pos = res.zsrc.getEntryPos(name, true);
294+
EntryPos pos = res.zsrc.getEntryPos(name, true, zipCoder);
292295
if (pos != null) {
293296
entry = getZipEntry(pos.name, pos.pos);
294297
}
@@ -328,7 +331,7 @@ public InputStream getInputStream(ZipEntry entry) throws IOException {
328331
if (Objects.equals(lastEntryName, entry.name)) {
329332
pos = lastEntryPos;
330333
} else {
331-
EntryPos entryPos = zsrc.getEntryPos(entry.name, false);
334+
EntryPos entryPos = zsrc.getEntryPos(entry.name, false, zipCoder);
332335
if (entryPos != null) {
333336
pos = entryPos.pos;
334337
} else {
@@ -363,6 +366,35 @@ public InputStream getInputStream(ZipEntry entry) throws IOException {
363366
}
364367
}
365368

369+
/**
370+
* Determines and returns a {@link ZipCoder} to use for decoding
371+
* name and comment fields of the ZIP entry identified by the {@code pos}
372+
* in the ZIP file's {@code cen}.
373+
* <p>
374+
* A ZIP entry's name and comment fields may be encoded using UTF-8, in
375+
* which case this method returns a UTF-8 capable {@code ZipCoder}. If the
376+
* entry doesn't require UTF-8, then this method returns the {@code fallback}
377+
* {@code ZipCoder}.
378+
*
379+
* @param cen the CEN
380+
* @param pos the ZIP entry's position in CEN
381+
* @param fallback the fallback ZipCoder to return if the entry doesn't require UTF-8
382+
*/
383+
private static ZipCoder zipCoderFor(final byte[] cen, final int pos, final ZipCoder fallback) {
384+
if (fallback.isUTF8()) {
385+
// the fallback ZipCoder is capable of handling UTF-8,
386+
// so no need to parse the entry flags to determine if
387+
// the entry has UTF-8 flag.
388+
return fallback;
389+
}
390+
if ((CENFLG(cen, pos) & USE_UTF8) != 0) {
391+
// entry requires a UTF-8 ZipCoder
392+
return ZipCoder.UTF8;
393+
}
394+
// entry doesn't require a UTF-8 ZipCoder
395+
return fallback;
396+
}
397+
366398
private static class InflaterCleanupAction implements Runnable {
367399
private final Inflater inf;
368400
private final CleanableResource res;
@@ -561,7 +593,7 @@ public Stream<? extends ZipEntry> stream() {
561593
private String getEntryName(int pos) {
562594
byte[] cen = res.zsrc.cen;
563595
int nlen = CENNAM(cen, pos);
564-
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
596+
ZipCoder zc = zipCoderFor(cen, pos, zipCoder);
565597
return zc.toString(cen, pos + CENHDR, nlen);
566598
}
567599

@@ -632,7 +664,7 @@ private ZipEntry getZipEntry(String name, int pos) {
632664
}
633665
if (clen != 0) {
634666
int start = pos + CENHDR + nlen + elen;
635-
ZipCoder zc = res.zsrc.zipCoderForPos(pos);
667+
ZipCoder zc = zipCoderFor(cen, pos, zipCoder);
636668
e.comment = zc.toString(cen, start, clen);
637669
}
638670
lastEntryName = e.name;
@@ -664,11 +696,12 @@ private static class CleanableResource implements Runnable {
664696

665697
Source zsrc;
666698

667-
CleanableResource(ZipFile zf, ZipCoder zc, File file, int mode) throws IOException {
699+
CleanableResource(ZipFile zf, ZipCoder zipCoder, File file, int mode) throws IOException {
700+
assert zipCoder != null : "null ZipCoder";
668701
this.cleanable = CleanerFactory.cleaner().register(zf, this);
669702
this.istreams = Collections.newSetFromMap(new WeakHashMap<>());
670703
this.inflaterCache = new ArrayDeque<>();
671-
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zc);
704+
this.zsrc = Source.get(file, (mode & OPEN_DELETE) != 0, zipCoder);
672705
}
673706

674707
void clean() {
@@ -1109,6 +1142,7 @@ public void setExternalFileAttributes(ZipEntry ze, int externalFileAttributes) {
11091142
// Represents the resolved name and position of a CEN record
11101143
static record EntryPos(String name, int pos) {}
11111144

1145+
// Implementation note: This class is thread safe.
11121146
private static class Source {
11131147
// While this is only used from ZipFile, defining it there would cause
11141148
// a bootstrap cycle that would leave this initialized as null
@@ -1121,7 +1155,6 @@ private static class Source {
11211155
private static final int MAX_CEN_SIZE = ArraysSupport.SOFT_MAX_ARRAY_LENGTH;
11221156

11231157
private final Key key; // the key in files
1124-
private final @Stable ZipCoder zc; // ZIP coder used to decode/encode
11251158

11261159
private int refs = 1;
11271160

@@ -1157,8 +1190,9 @@ private static class Source {
11571190
private int[] entries; // array of hashed cen entry
11581191

11591192
// Checks the entry at offset pos in the CEN, calculates the Entry values as per above,
1160-
// then returns the length of the entry name.
1161-
private int checkAndAddEntry(int pos, int index)
1193+
// then returns the length of the entry name. Uses the given zipCoder for processing the
1194+
// entry name and the entry comment (if any).
1195+
private int checkAndAddEntry(final int pos, final int index, final ZipCoder zipCoder)
11621196
throws ZipException
11631197
{
11641198
byte[] cen = this.cen;
@@ -1196,21 +1230,20 @@ private int checkAndAddEntry(int pos, int index)
11961230
}
11971231

11981232
try {
1199-
ZipCoder zcp = zipCoderForPos(pos);
1200-
int hash = zcp.checkedHash(cen, entryPos, nlen);
1233+
int hash = zipCoder.checkedHash(cen, entryPos, nlen);
12011234
int hsh = (hash & 0x7fffffff) % tablelen;
12021235
int next = table[hsh];
12031236
table[hsh] = index;
12041237
// Record the CEN offset and the name hash in our hash cell.
1205-
entries[index++] = hash;
1206-
entries[index++] = next;
1207-
entries[index ] = pos;
1238+
entries[index] = hash;
1239+
entries[index + 1] = next;
1240+
entries[index + 2] = pos;
12081241
// Validate comment if it exists.
12091242
// If the bytes representing the comment cannot be converted to
12101243
// a String via zcp.toString, an Exception will be thrown
12111244
if (clen > 0) {
12121245
int start = entryPos + nlen + elen;
1213-
zcp.toString(cen, start, clen);
1246+
zipCoder.toString(cen, start, clen);
12141247
}
12151248
} catch (Exception e) {
12161249
zerror("invalid CEN header (bad entry name or comment)");
@@ -1389,34 +1422,47 @@ private static boolean isZip64ExtBlockSizeValid(int blockSize, long csize,
13891422
private int tablelen; // number of hash heads
13901423

13911424
/**
1392-
* A class representing a key to a ZIP file. A key is based
1393-
* on the file key if available, or the path value if the
1394-
* file key is not available. The key is also based on the
1395-
* file's last modified time to allow for cases where a ZIP
1396-
* file is re-opened after it has been modified.
1425+
* A class representing a key to the Source of a ZipFile.
1426+
* The Key is composed of:
1427+
* - The BasicFileAttributes.fileKey() if available, or the Path of the ZIP file
1428+
* if the fileKey() is not available.
1429+
* - The ZIP file's last modified time (to allow for cases
1430+
* where a ZIP file is re-opened after it has been modified).
1431+
* - The Charset that was provided when constructing the ZipFile instance.
1432+
* The unique combination of these components identifies a Source of a ZipFile.
13971433
*/
13981434
private static class Key {
1399-
final BasicFileAttributes attrs;
1400-
File file;
1401-
final boolean utf8;
1402-
1403-
public Key(File file, BasicFileAttributes attrs, ZipCoder zc) {
1435+
private final BasicFileAttributes attrs;
1436+
private final File file;
1437+
// the Charset that was provided when constructing the ZipFile instance
1438+
private final Charset charset;
1439+
1440+
/**
1441+
* Constructs a {@code Key} to a {@code Source} of a {@code ZipFile}
1442+
*
1443+
* @param file the ZIP file
1444+
* @param attrs the attributes of the ZIP file
1445+
* @param charset the Charset that was provided when constructing the ZipFile instance
1446+
*/
1447+
public Key(File file, BasicFileAttributes attrs, Charset charset) {
14041448
this.attrs = attrs;
14051449
this.file = file;
1406-
this.utf8 = zc.isUTF8();
1450+
this.charset = charset;
14071451
}
14081452

1453+
@Override
14091454
public int hashCode() {
1410-
long t = utf8 ? 0 : Long.MAX_VALUE;
1455+
long t = charset.hashCode();
14111456
t += attrs.lastModifiedTime().toMillis();
14121457
Object fk = attrs.fileKey();
14131458
return Long.hashCode(t) +
14141459
(fk != null ? fk.hashCode() : file.hashCode());
14151460
}
14161461

1462+
@Override
14171463
public boolean equals(Object obj) {
14181464
if (obj instanceof Key key) {
1419-
if (key.utf8 != utf8) {
1465+
if (!charset.equals(key.charset)) {
14201466
return false;
14211467
}
14221468
if (!attrs.lastModifiedTime().equals(key.attrs.lastModifiedTime())) {
@@ -1440,12 +1486,12 @@ public boolean equals(Object obj) {
14401486
private static final java.nio.file.FileSystem builtInFS =
14411487
DefaultFileSystemProvider.theFileSystem();
14421488

1443-
static Source get(File file, boolean toDelete, ZipCoder zc) throws IOException {
1489+
static Source get(File file, boolean toDelete, ZipCoder zipCoder) throws IOException {
14441490
final Key key;
14451491
try {
14461492
key = new Key(file,
14471493
Files.readAttributes(builtInFS.getPath(file.getPath()),
1448-
BasicFileAttributes.class), zc);
1494+
BasicFileAttributes.class), zipCoder.charset());
14491495
} catch (InvalidPathException ipe) {
14501496
throw new IOException(ipe);
14511497
}
@@ -1457,7 +1503,7 @@ static Source get(File file, boolean toDelete, ZipCoder zc) throws IOException {
14571503
return src;
14581504
}
14591505
}
1460-
src = new Source(key, toDelete, zc);
1506+
src = new Source(key, toDelete, zipCoder);
14611507

14621508
synchronized (files) {
14631509
Source prev = files.putIfAbsent(key, src);
@@ -1479,8 +1525,7 @@ static void release(Source src) throws IOException {
14791525
}
14801526
}
14811527

1482-
private Source(Key key, boolean toDelete, ZipCoder zc) throws IOException {
1483-
this.zc = zc;
1528+
private Source(Key key, boolean toDelete, ZipCoder zipCoder) throws IOException {
14841529
this.key = key;
14851530
if (toDelete) {
14861531
if (OperatingSystem.isWindows()) {
@@ -1494,7 +1539,7 @@ private Source(Key key, boolean toDelete, ZipCoder zc) throws IOException {
14941539
this.zfile = new RandomAccessFile(key.file, "r");
14951540
}
14961541
try {
1497-
initCEN(-1);
1542+
initCEN(-1, zipCoder);
14981543
byte[] buf = new byte[4];
14991544
readFullyAt(buf, 0, 4, 0);
15001545
this.startsWithLoc = (LOCSIG(buf) == LOCSIG);
@@ -1649,7 +1694,7 @@ private End findEND() throws IOException {
16491694
}
16501695

16511696
// Reads ZIP file central directory.
1652-
private void initCEN(int knownTotal) throws IOException {
1697+
private void initCEN(final int knownTotal, final ZipCoder zipCoder) throws IOException {
16531698
// Prefer locals for better performance during startup
16541699
byte[] cen;
16551700
if (knownTotal == -1) {
@@ -1711,13 +1756,15 @@ private void initCEN(int knownTotal) throws IOException {
17111756
// This will only happen if the ZIP file has an incorrect
17121757
// ENDTOT field, which usually means it contains more than
17131758
// 65535 entries.
1714-
initCEN(countCENHeaders(cen));
1759+
initCEN(countCENHeaders(cen), zipCoder);
17151760
return;
17161761
}
17171762

17181763
int entryPos = pos + CENHDR;
1764+
// the ZipCoder for any non-UTF8 entries
1765+
final ZipCoder entryZipCoder = zipCoderFor(cen, pos, zipCoder);
17191766
// Checks the entry and adds values to entries[idx ... idx+2]
1720-
int nlen = checkAndAddEntry(pos, idx);
1767+
int nlen = checkAndAddEntry(pos, idx, entryZipCoder);
17211768
idx += 3;
17221769

17231770
// Adds name to metanames.
@@ -1741,7 +1788,7 @@ private void initCEN(int knownTotal) throws IOException {
17411788
try {
17421789
// Compute hash code of name from "META-INF/versions/{version)/{name}
17431790
int prefixLen = META_INF_VERSIONS_LEN + DecimalDigits.stringSize(version);
1744-
int hashCode = zipCoderForPos(pos).checkedHash(cen,
1791+
int hashCode = entryZipCoder.checkedHash(cen,
17451792
entryPos + prefixLen,
17461793
nlen - prefixLen);
17471794
// Register version for this hash code
@@ -1786,9 +1833,10 @@ private static void zerror(String msg) throws ZipException {
17861833

17871834
/*
17881835
* Returns the resolved name and position of the ZIP cen entry corresponding
1789-
* to the specified entry name, or {@code null} if not found.
1836+
* to the specified entry name, or {@code null} if not found.
17901837
*/
1791-
private EntryPos getEntryPos(String name, boolean addSlash) {
1838+
private EntryPos getEntryPos(final String name, final boolean addSlash,
1839+
final ZipCoder zipCoder) {
17921840
if (total == 0) {
17931841
return null;
17941842
}
@@ -1807,8 +1855,7 @@ private EntryPos getEntryPos(String name, boolean addSlash) {
18071855
int noff = pos + CENHDR;
18081856
int nlen = CENNAM(cen, pos);
18091857

1810-
ZipCoder zc = zipCoderForPos(pos);
1811-
1858+
final ZipCoder zc = zipCoderFor(cen, pos, zipCoder);
18121859
// Compare the lookup name with the name encoded in the CEN
18131860
switch (zc.compare(name, cen, noff, nlen, addSlash)) {
18141861
case ZipCoder.EXACT_MATCH:
@@ -1834,16 +1881,6 @@ private EntryPos getEntryPos(String name, boolean addSlash) {
18341881
return null;
18351882
}
18361883

1837-
private ZipCoder zipCoderForPos(int pos) {
1838-
if (zc.isUTF8()) {
1839-
return zc;
1840-
}
1841-
if ((CENFLG(cen, pos) & USE_UTF8) != 0) {
1842-
return ZipCoder.UTF8;
1843-
}
1844-
return zc;
1845-
}
1846-
18471884
/**
18481885
* Returns true if the bytes represent a non-directory name
18491886
* beginning with "META-INF/", disregarding ASCII case.

0 commit comments

Comments
 (0)