Skip to content

Commit

Permalink
8329623: NegativeArraySizeException encoding large String to UTF-8
Browse files Browse the repository at this point in the history
Reviewed-by: naoto, rgiulietti
  • Loading branch information
Roger Riggs committed Apr 8, 2024
1 parent dd930c5 commit 212a253
Show file tree
Hide file tree
Showing 2 changed files with 78 additions and 2 deletions.
49 changes: 48 additions & 1 deletion src/java.base/share/classes/java/lang/String.java
Original file line number Diff line number Diff line change
Expand Up @@ -1335,7 +1335,13 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
int dp = 0;
int sp = 0;
int sl = val.length >> 1;
byte[] dst = new byte[sl * 3];
// UTF-8 encoded can be as much as 3 times the string length
// For very large estimate, (as in overflow of 32 bit int), precompute the exact size
long allocLen = (sl * 3 < 0) ? computeSizeUTF8_UTF16(val, doReplace) : sl * 3;
if (allocLen > (long)Integer.MAX_VALUE) {
throw new OutOfMemoryError("Required length exceeds implementation limit");
}
byte[] dst = new byte[(int) allocLen];
while (sp < sl) {
// ascii fast loop;
char c = StringUTF16.getChar(val, sp);
Expand Down Expand Up @@ -1385,6 +1391,47 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
return Arrays.copyOf(dst, dp);
}

/**
* {@return the exact size required to UTF_8 encode this UTF16 string}
* @param val UTF16 encoded byte array
* @param doReplace true to replace unmappable characters
*/
private static long computeSizeUTF8_UTF16(byte[] val, boolean doReplace) {
long dp = 0L;
int sp = 0;
int sl = val.length >> 1;

while (sp < sl) {
char c = StringUTF16.getChar(val, sp++);
if (c < 0x80) {
dp++;
} else if (c < 0x800) {
dp += 2;
} else if (Character.isSurrogate(c)) {
int uc = -1;
char c2;
if (Character.isHighSurrogate(c) && sp < sl &&
Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
uc = Character.toCodePoint(c, c2);
}
if (uc < 0) {
if (doReplace) {
dp++;
} else {
throwUnmappable(sp - 1);
}
} else {
dp += 4;
sp++; // 2 chars
}
} else {
// 3 bytes, 16 bits
dp += 3;
}
}
return dp;
}

/**
* Constructs a new {@code String} by decoding the specified array of bytes
* using the specified {@linkplain java.nio.charset.Charset charset}. The
Expand Down
31 changes: 30 additions & 1 deletion test/jdk/java/lang/String/CompactString/MaxSizeUTF16String.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2023, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2023, 2024, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -118,4 +118,33 @@ public void testMaxCharArray() {
}
}
}

/*
* Test that UTF-8 of too large strings throws OOME, (not NegativeArraySizeException).
*/
@Test
public void testMaxUTF8_UTF16Encode() {
String s = "\uFFFF";
final byte[] bytes1 = s.getBytes(StandardCharsets.UTF_8);
assertEquals(3, bytes1.length, "UTF_8 encoded length of 0xffff");

int min = Integer.MAX_VALUE / bytes1.length - 1;
int max = min + 3;

// String of size min can be UTF_8 encoded.
System.out.println("testing size: " + min);
String s1 = s.repeat(min);
byte[] bytes = s1.getBytes(StandardCharsets.UTF_8);
int remaining = Integer.MAX_VALUE - bytes.length;
assertTrue(remaining >= bytes1.length, "remainder too large: " + remaining);

// Strings of size min+1...min+2, throw OOME
// The resulting byte array would exceed implementation limits
for (int count = min + 1; count < max; count++) {
System.out.println("testing size: " + count);
final String s2 = s.repeat(count);
OutOfMemoryError ex = assertThrows(OutOfMemoryError.class, () -> s2.getBytes(StandardCharsets.UTF_8));
ex.printStackTrace();
};
}
}

1 comment on commit 212a253

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.