Skip to content

Commit 78f71b4

Browse files
eirbjocl4es
authored andcommitted
8301873: Avoid string decoding in ZipFile.Source.getEntryPos
Reviewed-by: redestad, lancea
1 parent f82385e commit 78f71b4

File tree

4 files changed

+375
-25
lines changed

4 files changed

+375
-25
lines changed

src/java.base/share/classes/java/util/zip/ZipCoder.java

+88-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009, 2021, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2009, 2023, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -55,6 +55,30 @@ public static ZipCoder get(Charset charset) {
5555
return new ZipCoder(charset);
5656
}
5757

58+
/**
59+
* This enum represents the three possible return values for
60+
* {@link #compare(String, byte[], int, int, boolean)} when
61+
* this method compares a lookup name to a string encoded in the
62+
* CEN byte array.
63+
*/
64+
enum Comparison {
65+
/**
66+
* The lookup string is exactly equal
67+
* to the encoded string.
68+
*/
69+
EXACT_MATCH,
70+
/**
71+
* The lookup string and the encoded string differs only
72+
* by the encoded string having a trailing '/' character.
73+
*/
74+
DIRECTORY_MATCH,
75+
/**
76+
* The lookup string and the encoded string do not match.
77+
* (They are neither an exact match or a directory match.)
78+
*/
79+
NO_MATCH
80+
}
81+
5882
String toString(byte[] ba, int off, int length) {
5983
try {
6084
return decoder().decode(ByteBuffer.wrap(ba, off, length)).toString();
@@ -184,6 +208,52 @@ private byte[] slashBytes() {
184208
return slashBytes;
185209
}
186210

211+
/**
212+
* This method is used by ZipFile.Source.getEntryPos when comparing the
213+
* name being looked up to candidate names encoded in the CEN byte
214+
* array.
215+
*
216+
* Since ZipCode.getEntry supports looking up a "dir/" entry by
217+
* the name "dir", this method can optionally distinguish an
218+
* exact match from a partial "directory match" (where names only
219+
* differ by the encoded name having an additional trailing '/')
220+
*
221+
* The return values of this method are as follows:
222+
*
223+
* If the lookup name is exactly equal to the encoded string, return
224+
* {@link Comparison#EXACT_MATCH}.
225+
*
226+
* If the parameter {@code matchDirectory} is {@code true} and the
227+
* two strings differ only by the encoded string having an extra
228+
* trailing '/' character, then return {@link Comparison#DIRECTORY_MATCH}.
229+
*
230+
* Otherwise, return {@link Comparison#NO_MATCH}
231+
*
232+
* While a general implementation will need to decode bytes into a
233+
* String for comparison, this can be avoided if the String coder
234+
* and this ZipCoder are known to encode strings to the same bytes.
235+
*
236+
* @param str The lookup string to compare with the encoded string.
237+
* @param b The byte array holding the encoded string
238+
* @param off The offset into the array where the encoded string starts
239+
* @param len The length of the encoded string in bytes
240+
* @param matchDirectory If {@code true} and the strings do not match exactly,
241+
* a directory match will also be tested
242+
*
243+
*/
244+
Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) {
245+
String decoded = toString(b, off, len);
246+
if (decoded.startsWith(str)) {
247+
if (decoded.length() == str.length()) {
248+
return Comparison.EXACT_MATCH;
249+
} else if (matchDirectory
250+
&& decoded.length() == str.length() + 1
251+
&& decoded.endsWith("/") ) {
252+
return Comparison.DIRECTORY_MATCH;
253+
}
254+
}
255+
return Comparison.NO_MATCH;
256+
}
187257
static final class UTF8ZipCoder extends ZipCoder {
188258

189259
private UTF8ZipCoder(Charset utf8) {
@@ -232,5 +302,22 @@ int checkedHash(byte[] a, int off, int len) throws Exception {
232302
boolean hasTrailingSlash(byte[] a, int end) {
233303
return end > 0 && a[end - 1] == '/';
234304
}
305+
306+
@Override
307+
Comparison compare(String str, byte[] b, int off, int len, boolean matchDirectory) {
308+
try {
309+
byte[] encoded = JLA.getBytesNoRepl(str, UTF_8.INSTANCE);
310+
int mismatch = Arrays.mismatch(encoded, 0, encoded.length, b, off, off+len);
311+
if (mismatch == -1) {
312+
return Comparison.EXACT_MATCH;
313+
} else if (matchDirectory && len == mismatch + 1 && hasTrailingSlash(b, off + len)) {
314+
return Comparison.DIRECTORY_MATCH;
315+
} else {
316+
return Comparison.NO_MATCH;
317+
}
318+
} catch (CharacterCodingException e) {
319+
return Comparison.NO_MATCH;
320+
}
321+
}
235322
}
236323
}

src/java.base/share/classes/java/util/zip/ZipFile.java

+24-23
Original file line numberDiff line numberDiff line change
@@ -1633,40 +1633,41 @@ private int getEntryPos(String name, boolean addSlash) {
16331633
int hsh = ZipCoder.hash(name);
16341634
int idx = table[(hsh & 0x7fffffff) % tablelen];
16351635

1636+
int dirPos = -1; // Position of secondary match "name/"
1637+
16361638
// Search down the target hash chain for a entry whose
16371639
// 32 bit hash matches the hashed name.
16381640
while (idx != ZIP_ENDCHAIN) {
16391641
if (getEntryHash(idx) == hsh) {
1640-
// The CEN name must match the specified one
1642+
16411643
int pos = getEntryPos(idx);
1644+
int noff = pos + CENHDR;
1645+
int nlen = CENNAM(cen, pos);
16421646

1643-
try {
1644-
ZipCoder zc = zipCoderForPos(pos);
1645-
String entry = zc.toString(cen, pos + CENHDR, CENNAM(cen, pos));
1646-
1647-
// If addSlash is true we'll test for name+/ in addition to
1648-
// name, unless name is the empty string or already ends with a
1649-
// slash
1650-
int entryLen = entry.length();
1651-
int nameLen = name.length();
1652-
if (entryLen == nameLen && entry.equals(name)) {
1653-
// Found our match
1647+
ZipCoder zc = zipCoderForPos(pos);
1648+
1649+
// Compare the lookup name with the name encoded in the CEN
1650+
switch (zc.compare(name, cen, noff, nlen, addSlash)) {
1651+
case EXACT_MATCH:
1652+
// We found an exact match for "name"
16541653
return pos;
1655-
}
1656-
// If addSlash is true we'll now test for name+/ providing
1657-
if (addSlash && nameLen + 1 == entryLen
1658-
&& entry.startsWith(name) &&
1659-
entry.charAt(entryLen - 1) == '/') {
1660-
// Found the entry "name+/", now find the CEN entry pos
1661-
int exactPos = getEntryPos(name, false);
1662-
return exactPos == -1 ? pos : exactPos;
1663-
}
1664-
} catch (IllegalArgumentException iae) {
1665-
// Ignore
1654+
case DIRECTORY_MATCH:
1655+
// We found the directory "name/"
1656+
// Track its position, then continue the search for "name"
1657+
dirPos = pos;
1658+
break;
1659+
case NO_MATCH:
1660+
// Hash collision, continue searching
16661661
}
16671662
}
16681663
idx = getEntryNext(idx);
16691664
}
1665+
// Reaching this point means we did not find "name".
1666+
// Return the position of "name/" if we found it
1667+
if (dirPos != -1) {
1668+
return dirPos;
1669+
}
1670+
// No entry found
16701671
return -1;
16711672
}
16721673

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,183 @@
1+
/*
2+
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
3+
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4+
*
5+
* This code is free software; you can redistribute it and/or modify it
6+
* under the terms of the GNU General Public License version 2 only, as
7+
* published by the Free Software Foundation.
8+
*
9+
* This code is distributed in the hope that it will be useful, but WITHOUT
10+
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11+
* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12+
* version 2 for more details (a copy is included in the LICENSE file that
13+
* accompanied this code).
14+
*
15+
* You should have received a copy of the GNU General Public License version
16+
* 2 along with this work; if not, write to the Free Software Foundation,
17+
* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18+
*
19+
* Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20+
* or visit www.oracle.com if you need additional information or have any
21+
* questions.
22+
*
23+
*/
24+
25+
import org.testng.annotations.BeforeTest;
26+
import org.testng.annotations.Test;
27+
28+
import java.io.ByteArrayOutputStream;
29+
import java.io.FileOutputStream;
30+
import java.io.IOException;
31+
import java.nio.ByteBuffer;
32+
import java.nio.ByteOrder;
33+
import java.nio.channels.FileChannel;
34+
import java.nio.file.Path;
35+
import java.util.Arrays;
36+
import java.util.zip.ZipEntry;
37+
import java.util.zip.ZipException;
38+
import java.util.zip.ZipFile;
39+
import java.util.zip.ZipOutputStream;
40+
41+
import static org.testng.Assert.assertEquals;
42+
import static org.testng.Assert.expectThrows;
43+
44+
/**
45+
* @test
46+
* @summary Validate that opening ZIP files files with invalid UTF-8
47+
* byte sequences in the name or comment fields fails with ZipException
48+
* @run testng/othervm InvalidBytesInEntryNameOrComment
49+
*/
50+
public class InvalidBytesInEntryNameOrComment {
51+
52+
// Offsets for navigating the CEN fields
53+
private static final int EOC_OFF = 6; // Offset from EOF to find CEN offset
54+
private static final int CEN_HDR = 45; // Size of a CEN header
55+
private static final int NLEN = 28; // Name length
56+
private static final int ELEN = 30; // Extra length
57+
private static final int CLEN = 32; // Comment length
58+
59+
// Example invalid UTF-8 byte sequence
60+
private static final byte[] INVALID_UTF8_BYTE_SEQUENCE = {(byte) 0xF0, (byte) 0xA4, (byte) 0xAD};
61+
62+
// Expected ZipException regex
63+
private static final String BAD_ENTRY_NAME_OR_COMMENT = "invalid CEN header (bad entry name or comment)";
64+
65+
// ZIP file with invalid name field
66+
private Path invalidName;
67+
68+
// ZIP file with invalid comment field
69+
private Path invalidComment;
70+
71+
@BeforeTest
72+
public void setup() throws IOException {
73+
// Create a ZIP file with valid name and comment fields
74+
byte[] templateZip = templateZIP();
75+
76+
// Create a ZIP with a CEN name field containing an invalid byte sequence
77+
invalidName = invalidName("invalid-name.zip", templateZip);
78+
79+
// Create a ZIP with a CEN comment field containing an invalid byte sequence
80+
invalidComment = invalidComment("invalid-comment.zip", templateZip);
81+
}
82+
83+
/**
84+
* Opening a ZipFile with an invalid UTF-8 byte sequence in
85+
* the name field of a CEN file header should throw a
86+
* ZipException with "bad entry name or comment"
87+
*/
88+
@Test
89+
public void shouldRejectInvalidName() throws IOException {
90+
ZipException ex = expectThrows(ZipException.class, () -> {
91+
new ZipFile(invalidName.toFile());
92+
});
93+
assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT);
94+
}
95+
96+
/**
97+
* Opening a ZipFile with an invalid UTF-8 byte sequence in
98+
* the comment field of a CEN file header should throw a
99+
* ZipException with "bad entry name or comment"
100+
*/
101+
@Test
102+
public void shouldRejectInvalidComment() throws IOException {
103+
ZipException ex = expectThrows(ZipException.class, () -> {
104+
new ZipFile(invalidComment.toFile());
105+
});
106+
assertEquals(ex.getMessage(), BAD_ENTRY_NAME_OR_COMMENT);
107+
}
108+
109+
/**
110+
* Make a valid ZIP file used as a template for invalid files
111+
*/
112+
private byte[] templateZIP() throws IOException {
113+
ByteArrayOutputStream bout = new ByteArrayOutputStream();
114+
try (ZipOutputStream zo = new ZipOutputStream(bout)) {
115+
ZipEntry commentEntry = new ZipEntry("file");
116+
commentEntry.setComment("Comment");
117+
zo.putNextEntry(commentEntry);
118+
}
119+
return bout.toByteArray();
120+
}
121+
122+
/**
123+
* Make a ZIP with invalid bytes in the CEN name field
124+
*/
125+
private Path invalidName(String name, byte[] template) throws IOException {
126+
ByteBuffer buffer = copyTemplate(template);
127+
int off = cenStart(buffer);
128+
// Name field starts here
129+
int noff = off + CEN_HDR;
130+
131+
// Write invalid bytes
132+
buffer.put(noff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length);
133+
return writeFile(name, buffer);
134+
135+
}
136+
137+
/**
138+
* Make a copy of the ZIP template and wrap it in a little-endian
139+
* ByteBuffer
140+
*/
141+
private ByteBuffer copyTemplate(byte[] template) {
142+
return ByteBuffer.wrap(Arrays.copyOf(template, template.length))
143+
.order(ByteOrder.LITTLE_ENDIAN);
144+
}
145+
146+
/**
147+
* Make a ZIP with invalid bytes in the CEN comment field
148+
*/
149+
private Path invalidComment(String name, byte[] template) throws IOException {
150+
ByteBuffer buffer = copyTemplate(template);
151+
int off = cenStart(buffer);
152+
// Need to skip past the length of the name and extra fields
153+
int nlen = buffer.getShort(off + NLEN);
154+
int elen = buffer.getShort(off + ELEN);
155+
156+
// Comment field starts here
157+
int coff = off + CEN_HDR + nlen + elen;
158+
159+
// Write invalid bytes
160+
buffer.put(coff, INVALID_UTF8_BYTE_SEQUENCE, 0, INVALID_UTF8_BYTE_SEQUENCE.length);
161+
return writeFile(name, buffer);
162+
}
163+
164+
165+
/**
166+
* Finds the offset of the start of the CEN directory
167+
*/
168+
private int cenStart(ByteBuffer buffer) {
169+
return buffer.getInt(buffer.capacity() - EOC_OFF);
170+
}
171+
172+
/**
173+
* Utility to write a ByteBuffer to disk
174+
*/
175+
private Path writeFile(String name, ByteBuffer buffer) throws IOException {
176+
Path zip = Path.of(name);
177+
try (FileChannel ch = new FileOutputStream(zip.toFile()).getChannel()) {
178+
buffer.rewind();
179+
ch.write(buffer);
180+
}
181+
return zip;
182+
}
183+
}

0 commit comments

Comments
 (0)