From 937a8681eb2f9d9f770ac7439c9fb47c633c36b7 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Tue, 28 Oct 2025 14:16:12 +0100 Subject: [PATCH 01/17] 8369564: Provide a MemorySegment API to read strings with known lengths --- .../java/lang/foreign/MemorySegment.java | 45 +++++++++++++-- .../foreign/AbstractMemorySegmentImpl.java | 9 +++ .../jdk/internal/foreign/StringSupport.java | 45 +++++++++++++++ test/jdk/java/foreign/TestStringEncoding.java | 55 +++++++++++++++---- 4 files changed, 138 insertions(+), 16 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 196f44d1abedc..333b370a06d10 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1296,12 +1296,7 @@ MemorySegment reinterpret(long newSize, * over the decoding process is required. *

* Getting a string from a segment with a known byte offset and - * known byte length can be done like so: - * {@snippet lang=java : - * byte[] bytes = new byte[length]; - * MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length); - * return new String(bytes, charset); - * } + * known byte length can be done using {@link #getString(long, int, Charset)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -1328,6 +1323,44 @@ MemorySegment reinterpret(long newSize, */ String getString(long offset, Charset charset); + /** + * Reads a string using the given byte length from this segment at the given offset, + * using the provided charset. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement string. The {@link + * java.nio.charset.CharsetDecoder} class should be used when more control + * over the decoding process is required. + * + * @param offset offset in bytes (relative to this segment address) at which this + * access operation will occur + * @param length byte length to be used for string conversion (not including any + * null termination) + * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the + * string bytes + * @return a Java string constructed from the bytes read from the given starting + * address reading the given length of characters + * @throws IllegalArgumentException if the size of the string is greater than the + * largest string supported by the platform + * @throws IndexOutOfBoundsException if {@code offset < 0} + * @throws IndexOutOfBoundsException if {@code offset > byteSize() - (B + N)}, where: + *

+ * @throws IllegalStateException if the {@linkplain #scope() scope} associated with + * this segment is not {@linkplain Scope#isAlive() alive} + * @throws WrongThreadException if this method is called from a thread {@code T}, + * such that {@code isAccessibleBy(T) == false} + * @throws IllegalArgumentException if {@code charset} is not a + * {@linkplain StandardCharsets standard charset} + * @throws IllegalArgumentException if {@code length < 0} + */ + String getString(long offset, int length, Charset charset); + /** * Writes the given string into this segment at the given offset, converting it to * a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8} diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index d7636032c2823..238bd529d4a17 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -535,6 +535,15 @@ public boolean equals(Object o) { unsafeGetOffset() == that.unsafeGetOffset(); } + @Override + public String getString(long offset, int length, Charset charset) { + if (length < 0) { + throw new IllegalArgumentException(); + } + Objects.requireNonNull(charset); + return StringSupport.read(this, offset, length, charset); + } + @Override public int hashCode() { return Objects.hash( diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index bb6cb2d391544..9c78473fe8b42 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -58,6 +58,15 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse }; } + @ForceInline + public static String read(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { + return switch (CharsetKind.of(charset)) { + case SINGLE_BYTE -> readByte(segment, offset, len, charset); + case DOUBLE_BYTE -> readShort(segment, offset, len, charset); + case QUAD_BYTE -> readInt(segment, offset, len, charset); + }; + } + @ForceInline public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { switch (CharsetKind.of(charset)) { @@ -80,6 +89,18 @@ private static String readByte(AbstractMemorySegmentImpl segment, long offset, C } } + @ForceInline + private static String readByte(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { + final byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); + try { + return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); + } catch (CharacterCodingException _) { + // use replacement characters for malformed input + return new String(bytes, charset); + } + } + @ForceInline private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); @@ -99,6 +120,18 @@ private static String readShort(AbstractMemorySegmentImpl segment, long offset, } } + @ForceInline + private static String readShort(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { + byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); + try { + return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); + } catch (CharacterCodingException _) { + // use replacement characters for malformed input + return new String(bytes, charset); + } + } + @ForceInline private static void writeShort(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); @@ -118,6 +151,18 @@ private static String readInt(AbstractMemorySegmentImpl segment, long offset, Ch } } + @ForceInline + private static String readInt(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { + byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); + try { + return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); + } catch (CharacterCodingException _) { + // use replacement characters for malformed input + return new String(bytes, charset); + } + } + @ForceInline private static void writeInt(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { int bytes = copyBytes(string, segment, charset, offset); diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index 94732943b9d36..ef8de10a1860c 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -37,6 +37,7 @@ import java.util.Arrays; import java.util.List; import java.util.Random; +import java.util.Set; import java.util.function.UnaryOperator; import jdk.internal.foreign.AbstractMemorySegmentImpl; @@ -76,17 +77,8 @@ public void testStrings(String testString) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); - int terminatorSize = "\0".getBytes(charset).length; - if (charset == StandardCharsets.UTF_16) { - terminatorSize -= 2; // drop BOM - } - // Note that the JDK's UTF_32 encoder doesn't add a BOM. - // This is legal under the Unicode standard, and means the byte order is BE. - // See: https://unicode.org/faq/utf_bom.html#gen7 - int expectedByteLength = - testString.getBytes(charset).length + - terminatorSize; + testString.getBytes(charset).length + terminatorSize(charset); assertEquals(text.byteSize(), expectedByteLength); @@ -102,6 +94,37 @@ public void testStrings(String testString) { } } + @Test(dataProvider = "strings") + public void testStringsLength(String testString) { + Set excluded = Set.of("yen", "snowman", "rainbow"); + // This test only works for certain strings where the last character is not special + if (!testString.isEmpty() && excluded.stream().noneMatch(testString::startsWith)) { + for (Charset charset : Charset.availableCharsets().values()) { + if (isStandard(charset)) { + for (Arena arena : arenas()) { + try (arena) { + MemorySegment text = arena.allocateFrom(testString, charset); + + String roundTrip = text.getString(0, + (int) text.byteSize() - terminatorSize(charset) * 2, charset); + if (charset.newEncoder().canEncode(testString)) { + assertEquals(roundTrip, testString.substring(0, testString.length() - 1)); + } + } + } + } + } + } + } + + @Test + public void testStringsLengthNegative() { + try (Arena arena = Arena.ofConfined()) { + var segment = arena.allocateFrom("abc"); + assertThrows(IllegalArgumentException.class, () -> segment.getString(0, -1, StandardCharsets.UTF_8)); + } + } + @Test(dataProvider = "strings") public void testStringsHeap(String testString) { for (Charset charset : singleByteCharsets()) { @@ -540,4 +563,16 @@ public static Object[][] charsetsAndSegments() { } return values.toArray(Object[][]::new); } + + static int terminatorSize(Charset charset) { + int terminatorSize = "\0".getBytes(charset).length; + if (charset == StandardCharsets.UTF_16) { + terminatorSize -= 2; // drop BOM + } + // Note that the JDK's UTF_32 encoder doesn't add a BOM. + // This is legal under the Unicode standard, and means the byte order is BE. + // See: https://unicode.org/faq/utf_bom.html#gen7 + return terminatorSize; + } + } From cd6db90b03ea2f7bbcbcc3ed0f2b8a27fd6cffce Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Wed, 29 Oct 2025 17:33:05 +0100 Subject: [PATCH 02/17] Consolidate duplicate code in read methods --- .../foreign/AbstractMemorySegmentImpl.java | 2 +- .../jdk/internal/foreign/StringSupport.java | 78 +++---------------- 2 files changed, 13 insertions(+), 67 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 238bd529d4a17..469d95deec180 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -541,7 +541,7 @@ public String getString(long offset, int length, Charset charset) { throw new IllegalArgumentException(); } Objects.requireNonNull(charset); - return StringSupport.read(this, offset, length, charset); + return StringSupport.read(this, offset, charset, length); } @Override diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 9c78473fe8b42..3e045584e5d3b 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -59,12 +59,15 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse } @ForceInline - public static String read(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { - return switch (CharsetKind.of(charset)) { - case SINGLE_BYTE -> readByte(segment, offset, len, charset); - case DOUBLE_BYTE -> readShort(segment, offset, len, charset); - case QUAD_BYTE -> readInt(segment, offset, len, charset); - }; + public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, int len) { + final byte[] bytes = new byte[len]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); + try { + return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); + } catch (CharacterCodingException _) { + // use replacement characters for malformed input + return new String(bytes, charset); + } } @ForceInline @@ -79,26 +82,7 @@ public static void write(AbstractMemorySegmentImpl segment, long offset, Charset @ForceInline private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) { final int len = strlenByte(segment, offset, segment.byteSize()); - final byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } - } - - @ForceInline - private static String readByte(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { - final byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return read(segment, offset, charset, len); } @ForceInline @@ -110,26 +94,7 @@ private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Ch @ForceInline private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenShort(segment, offset, segment.byteSize()); - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } - } - - @ForceInline - private static String readShort(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return read(segment, offset, charset, len); } @ForceInline @@ -141,26 +106,7 @@ private static void writeShort(AbstractMemorySegmentImpl segment, long offset, C @ForceInline private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenInt(segment, offset, segment.byteSize()); - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } - } - - @ForceInline - private static String readInt(AbstractMemorySegmentImpl segment, long offset, int len, Charset charset) { - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return read(segment, offset, charset, len); } @ForceInline From 43a719ebb4e99bba7de68718e77a824337b7eb3e Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Thu, 30 Oct 2025 17:48:03 +0100 Subject: [PATCH 03/17] Update length to code units instead of bytes --- .../java/lang/foreign/MemorySegment.java | 20 +++++-------- .../java/lang/foreign/SegmentAllocator.java | 2 +- .../foreign/AbstractMemorySegmentImpl.java | 2 +- .../jdk/internal/foreign/StringSupport.java | 28 +++++++++++-------- test/jdk/java/foreign/TestStringEncoding.java | 19 ++++++------- 5 files changed, 35 insertions(+), 36 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 333b370a06d10..3467d91dcbca5 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1296,7 +1296,7 @@ MemorySegment reinterpret(long newSize, * over the decoding process is required. *

* Getting a string from a segment with a known byte offset and - * known byte length can be done using {@link #getString(long, int, Charset)}. + * known byte length can be done using {@link #getString(long, Charset, int)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -1324,7 +1324,7 @@ MemorySegment reinterpret(long newSize, String getString(long offset, Charset charset); /** - * Reads a string using the given byte length from this segment at the given offset, + * Reads a string using the given length from this segment at the given offset, * using the provided charset. *

* This method always replaces malformed-input and unmappable-character @@ -1334,23 +1334,17 @@ MemorySegment reinterpret(long newSize, * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur - * @param length byte length to be used for string conversion (not including any - * null termination) * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the * string bytes + * @param length length to be used for string conversion, in code units for + * the provided charset * @return a Java string constructed from the bytes read from the given starting * address reading the given length of characters * @throws IllegalArgumentException if the size of the string is greater than the * largest string supported by the platform * @throws IndexOutOfBoundsException if {@code offset < 0} - * @throws IndexOutOfBoundsException if {@code offset > byteSize() - (B + N)}, where: - *

    - *
  • {@code B} is the size, in bytes, of the string encoded using the - * provided charset (e.g. {@code str.getBytes(charset).length});
  • - *
  • {@code N} is the size (in bytes) of the terminator char according - * to the provided charset. For instance, this is 1 for - * {@link StandardCharsets#US_ASCII} and 2 for {@link StandardCharsets#UTF_16}.
  • - *
+ * @throws IndexOutOfBoundsException if {@code offset > byteSize() - (length * N)}, + * where {@code N} is the size, in bytes, of a code unit in the provided charset * @throws IllegalStateException if the {@linkplain #scope() scope} associated with * this segment is not {@linkplain Scope#isAlive() alive} * @throws WrongThreadException if this method is called from a thread {@code T}, @@ -1359,7 +1353,7 @@ MemorySegment reinterpret(long newSize, * {@linkplain StandardCharsets standard charset} * @throws IllegalArgumentException if {@code length < 0} */ - String getString(long offset, int length, Charset charset); + String getString(long offset, Charset charset, int length); /** * Writes the given string into this segment at the given offset, converting it to diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 1297406dcf194..94290f6992be9 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -134,7 +134,7 @@ default MemorySegment allocateFrom(String str) { default MemorySegment allocateFrom(String str, Charset charset) { Objects.requireNonNull(charset); Objects.requireNonNull(str); - int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize(); + int termCharSize = StringSupport.CharsetKind.of(charset).codeUnitSize(); MemorySegment segment; int length; if (StringSupport.bytesCompatible(str, charset)) { diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 469d95deec180..9f087346bcfb1 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -536,7 +536,7 @@ public boolean equals(Object o) { } @Override - public String getString(long offset, int length, Charset charset) { + public String getString(long offset, Charset charset, int length) { if (length < 0) { throw new IllegalArgumentException(); } diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 3e045584e5d3b..b44e728fae75b 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -59,9 +59,15 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse } @ForceInline - public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, int len) { - final byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); + public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, int length) { + int lengthBytes = length * CharsetKind.of(charset).codeUnitSize(); + return readBytes(segment, offset, charset, lengthBytes); + } + + @ForceInline + public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, int lengthBytes) { + final byte[] bytes = new byte[lengthBytes]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes); try { return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); } catch (CharacterCodingException _) { @@ -82,7 +88,7 @@ public static void write(AbstractMemorySegmentImpl segment, long offset, Charset @ForceInline private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) { final int len = strlenByte(segment, offset, segment.byteSize()); - return read(segment, offset, charset, len); + return readBytes(segment, offset, charset, len); } @ForceInline @@ -94,7 +100,7 @@ private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Ch @ForceInline private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenShort(segment, offset, segment.byteSize()); - return read(segment, offset, charset, len); + return readBytes(segment, offset, charset, len); } @ForceInline @@ -106,7 +112,7 @@ private static void writeShort(AbstractMemorySegmentImpl segment, long offset, C @ForceInline private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenInt(segment, offset, segment.byteSize()); - return read(segment, offset, charset, len); + return readBytes(segment, offset, charset, len); } @ForceInline @@ -306,14 +312,14 @@ public enum CharsetKind { DOUBLE_BYTE(2), QUAD_BYTE(4); - final int terminatorCharSize; + final int codeUnitSize; - CharsetKind(int terminatorCharSize) { - this.terminatorCharSize = terminatorCharSize; + CharsetKind(int codeUnitSize) { + this.codeUnitSize = codeUnitSize; } - public int terminatorCharSize() { - return terminatorCharSize; + public int codeUnitSize() { + return codeUnitSize; } public static CharsetKind of(Charset charset) { diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index ef8de10a1860c..c8bafb1b8d861 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -78,7 +78,7 @@ public void testStrings(String testString) { MemorySegment text = arena.allocateFrom(testString, charset); int expectedByteLength = - testString.getBytes(charset).length + terminatorSize(charset); + testString.getBytes(charset).length + codeUnitSize(charset); assertEquals(text.byteSize(), expectedByteLength); @@ -104,11 +104,10 @@ public void testStringsLength(String testString) { for (Arena arena : arenas()) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); - - String roundTrip = text.getString(0, - (int) text.byteSize() - terminatorSize(charset) * 2, charset); + int length = testString.getBytes(charset).length / codeUnitSize(charset); + String roundTrip = text.getString(0, charset, length); if (charset.newEncoder().canEncode(testString)) { - assertEquals(roundTrip, testString.substring(0, testString.length() - 1)); + assertEquals(roundTrip, testString); } } } @@ -121,7 +120,7 @@ public void testStringsLength(String testString) { public void testStringsLengthNegative() { try (Arena arena = Arena.ofConfined()) { var segment = arena.allocateFrom("abc"); - assertThrows(IllegalArgumentException.class, () -> segment.getString(0, -1, StandardCharsets.UTF_8)); + assertThrows(IllegalArgumentException.class, () -> segment.getString(0, StandardCharsets.UTF_8, -1)); } } @@ -564,15 +563,15 @@ public static Object[][] charsetsAndSegments() { return values.toArray(Object[][]::new); } - static int terminatorSize(Charset charset) { - int terminatorSize = "\0".getBytes(charset).length; + static int codeUnitSize(Charset charset) { + int codeUnitSize = "\0".getBytes(charset).length; if (charset == StandardCharsets.UTF_16) { - terminatorSize -= 2; // drop BOM + codeUnitSize -= 2; // drop BOM } // Note that the JDK's UTF_32 encoder doesn't add a BOM. // This is legal under the Unicode standard, and means the byte order is BE. // See: https://unicode.org/faq/utf_bom.html#gen7 - return terminatorSize; + return codeUnitSize; } } From 53b064f266ca07aca4c1ec286fe5978988db38ee Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Tue, 4 Nov 2025 13:51:11 +0100 Subject: [PATCH 04/17] Add benchmarks, and demo setStringWithoutNullTerminator --- .../java/lang/foreign/MemorySegment.java | 37 +++++++ .../foreign/AbstractMemorySegmentImpl.java | 8 ++ .../java/lang/foreign/FromJavaStringTest.java | 100 ++++++++++++++++++ .../java/lang/foreign/ToJavaStringTest.java | 20 +++- 4 files changed, 164 insertions(+), 1 deletion(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 3467d91dcbca5..5857a19b064df 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1422,6 +1422,43 @@ MemorySegment reinterpret(long newSize, */ void setString(long offset, String str, Charset charset); + /** + * Writes the given string into this segment at the given offset, converting it to a + * byte sequence using the provided charset. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement string. The {@link + * java.nio.charset.CharsetDecoder} class should be used when more control + * over the decoding process is required. + *

+ * If the given string contains any {@code '\0'} characters, they will be + * copied as well. This means that, depending on the method used to read + * the string, such as {@link MemorySegment#getString(long)}, the string + * will appear truncated when read again. + * + * @param offset offset in bytes (relative to this segment address) at which this + * access operation will occur, the final address of this write + * operation can be expressed as {@code address() + offset} + * @param str the Java string to be written into this segment + * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the + * string bytes. The {@code charset} must be a + * {@linkplain StandardCharsets standard charset} + * @return the number of bytes written. + * @throws IndexOutOfBoundsException if {@code offset < 0} + * @throws IndexOutOfBoundsException if {@code offset > byteSize() - B}, where: + * {@code B} is the size, in bytes, of the string encoded using the + * provided charset (e.g. {@code str.getBytes(charset).length}). + * @throws IllegalStateException if the {@linkplain #scope() scope} associated with + * this segment is not {@linkplain Scope#isAlive() alive} + * @throws WrongThreadException if this method is called from a thread {@code T}, + * such that {@code isAccessibleBy(T) == false} + * @throws IllegalArgumentException if {@code charset} is not a + * {@linkplain StandardCharsets standard charset} + * @throws IllegalArgumentException if this segment is + * {@linkplain #isReadOnly() read-only} + */ + int setStringWithoutNullTerminator(long offset, String str, Charset charset); + /** * Creates a memory segment that is backed by the same region of memory that backs * the given {@link Buffer} instance. The segment starts relative to the buffer's diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 9f087346bcfb1..9b9869bd03c4d 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -960,4 +960,12 @@ public void setString(long offset, String str, Charset charset) { Objects.requireNonNull(str); StringSupport.write(this, offset, charset, str); } + + @ForceInline + @Override + public int setStringWithoutNullTerminator(long offset, String str, Charset charset) { + Objects.requireNonNull(charset); + Objects.requireNonNull(str); + return StringSupport.copyBytes(str, this, charset, offset); + } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java new file mode 100644 index 0000000000000..b37e21ab0ac82 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang.foreign; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork( + value = 3, + jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +public class FromJavaStringTest { + + private String str; + private MemorySegment strSegment; + private int lengthBytes; + + @Param({"5", "20", "100", "200", "451"}) + int size; + + static { + System.loadLibrary("ToJavaString"); + } + + @Setup + public void setup() { + var arena = Arena.ofAuto(); + while (LOREM.length() < size) { + LOREM += LOREM; + } + str = LOREM.substring(0, size); + strSegment = arena.allocateFrom(str); + lengthBytes = str.getBytes(UTF_8).length; + } + + @Benchmark + public void panama_setString() { + strSegment.setString(0, str, UTF_8); + } + + @Benchmark + public void panama_setStringWithoutNullTerminator() { + strSegment.setStringWithoutNullTerminator(0, str, UTF_8); + } + + @Benchmark + public void panama_copyStringWithoutNullTerminator() { + byte[] bytes = str.getBytes(UTF_8); + MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); + } + + static String LOREM = + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et + dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip + ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu + fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt + mollit anim id est laborum. + """; +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java index 901f4c7097f72..afeff2fcbc77e 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java @@ -22,6 +22,9 @@ */ package org.openjdk.bench.java.lang.foreign; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; + import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -47,6 +50,7 @@ public class ToJavaStringTest { private MemorySegment strSegment; + private int lengthBytes; @Param({"5", "20", "100", "200", "451"}) int size; @@ -61,7 +65,9 @@ public void setup() { while (LOREM.length() < size) { LOREM += LOREM; } - strSegment = arena.allocateFrom(LOREM.substring(0, size)); + var s = LOREM.substring(0, size); + strSegment = arena.allocateFrom(s); + lengthBytes = s.getBytes(UTF_8).length; } @Benchmark @@ -69,6 +75,18 @@ public String panama_readString() { return strSegment.getString(0); } + @Benchmark + public String panama_readStringLength() { + return strSegment.getString(0, UTF_8, lengthBytes); + } + + @Benchmark + public String panama_copyLength() { + byte[] bytes = new byte[lengthBytes]; + MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, lengthBytes); + return new String(bytes, UTF_8); + } + @Benchmark public String jni_readString() { return readString(strSegment.address()); From b729b5516cb9e7a93c3e6e83d4748bf09dab596c Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Mon, 17 Nov 2025 16:28:51 +0100 Subject: [PATCH 05/17] Remove setStringWithoutNullTerminator demo --- .../java/lang/foreign/MemorySegment.java | 37 ------- .../foreign/AbstractMemorySegmentImpl.java | 8 -- .../java/lang/foreign/FromJavaStringTest.java | 100 ------------------ .../java/lang/foreign/ToJavaStringTest.java | 18 ++-- 4 files changed, 9 insertions(+), 154 deletions(-) delete mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 5857a19b064df..3467d91dcbca5 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1422,43 +1422,6 @@ MemorySegment reinterpret(long newSize, */ void setString(long offset, String str, Charset charset); - /** - * Writes the given string into this segment at the given offset, converting it to a - * byte sequence using the provided charset. - *

- * This method always replaces malformed-input and unmappable-character - * sequences with this charset's default replacement string. The {@link - * java.nio.charset.CharsetDecoder} class should be used when more control - * over the decoding process is required. - *

- * If the given string contains any {@code '\0'} characters, they will be - * copied as well. This means that, depending on the method used to read - * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. - * - * @param offset offset in bytes (relative to this segment address) at which this - * access operation will occur, the final address of this write - * operation can be expressed as {@code address() + offset} - * @param str the Java string to be written into this segment - * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the - * string bytes. The {@code charset} must be a - * {@linkplain StandardCharsets standard charset} - * @return the number of bytes written. - * @throws IndexOutOfBoundsException if {@code offset < 0} - * @throws IndexOutOfBoundsException if {@code offset > byteSize() - B}, where: - * {@code B} is the size, in bytes, of the string encoded using the - * provided charset (e.g. {@code str.getBytes(charset).length}). - * @throws IllegalStateException if the {@linkplain #scope() scope} associated with - * this segment is not {@linkplain Scope#isAlive() alive} - * @throws WrongThreadException if this method is called from a thread {@code T}, - * such that {@code isAccessibleBy(T) == false} - * @throws IllegalArgumentException if {@code charset} is not a - * {@linkplain StandardCharsets standard charset} - * @throws IllegalArgumentException if this segment is - * {@linkplain #isReadOnly() read-only} - */ - int setStringWithoutNullTerminator(long offset, String str, Charset charset); - /** * Creates a memory segment that is backed by the same region of memory that backs * the given {@link Buffer} instance. The segment starts relative to the buffer's diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 9b9869bd03c4d..9f087346bcfb1 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -960,12 +960,4 @@ public void setString(long offset, String str, Charset charset) { Objects.requireNonNull(str); StringSupport.write(this, offset, charset, str); } - - @ForceInline - @Override - public int setStringWithoutNullTerminator(long offset, String str, Charset charset) { - Objects.requireNonNull(charset); - Objects.requireNonNull(str); - return StringSupport.copyBytes(str, this, charset, offset); - } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java deleted file mode 100644 index b37e21ab0ac82..0000000000000 --- a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java +++ /dev/null @@ -1,100 +0,0 @@ -/* - * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. - * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. - * - * This code is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 only, as - * published by the Free Software Foundation. - * - * This code is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License - * version 2 for more details (a copy is included in the LICENSE file that - * accompanied this code). - * - * You should have received a copy of the GNU General Public License version - * 2 along with this work; if not, write to the Free Software Foundation, - * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. - * - * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA - * or visit www.oracle.com if you need additional information or have any - * questions. - */ -package org.openjdk.bench.java.lang.foreign; - -import static java.lang.foreign.ValueLayout.JAVA_BYTE; -import static java.nio.charset.StandardCharsets.UTF_8; - -import org.openjdk.jmh.annotations.Benchmark; -import org.openjdk.jmh.annotations.BenchmarkMode; -import org.openjdk.jmh.annotations.Fork; -import org.openjdk.jmh.annotations.Measurement; -import org.openjdk.jmh.annotations.Mode; -import org.openjdk.jmh.annotations.OutputTimeUnit; -import org.openjdk.jmh.annotations.Param; -import org.openjdk.jmh.annotations.Scope; -import org.openjdk.jmh.annotations.Setup; -import org.openjdk.jmh.annotations.State; -import org.openjdk.jmh.annotations.Warmup; - -import java.lang.foreign.Arena; -import java.lang.foreign.MemorySegment; -import java.util.concurrent.TimeUnit; - -@BenchmarkMode(Mode.AverageTime) -@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) -@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) -@State(Scope.Benchmark) -@OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork( - value = 3, - jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) -public class FromJavaStringTest { - - private String str; - private MemorySegment strSegment; - private int lengthBytes; - - @Param({"5", "20", "100", "200", "451"}) - int size; - - static { - System.loadLibrary("ToJavaString"); - } - - @Setup - public void setup() { - var arena = Arena.ofAuto(); - while (LOREM.length() < size) { - LOREM += LOREM; - } - str = LOREM.substring(0, size); - strSegment = arena.allocateFrom(str); - lengthBytes = str.getBytes(UTF_8).length; - } - - @Benchmark - public void panama_setString() { - strSegment.setString(0, str, UTF_8); - } - - @Benchmark - public void panama_setStringWithoutNullTerminator() { - strSegment.setStringWithoutNullTerminator(0, str, UTF_8); - } - - @Benchmark - public void panama_copyStringWithoutNullTerminator() { - byte[] bytes = str.getBytes(UTF_8); - MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); - } - - static String LOREM = - """ - Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et - dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip - ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu - fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt - mollit anim id est laborum. - """; -} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java index afeff2fcbc77e..0d90ac7bfe9a4 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java @@ -50,7 +50,7 @@ public class ToJavaStringTest { private MemorySegment strSegment; - private int lengthBytes; + private int length; @Param({"5", "20", "100", "200", "451"}) int size; @@ -67,7 +67,7 @@ public void setup() { } var s = LOREM.substring(0, size); strSegment = arena.allocateFrom(s); - lengthBytes = s.getBytes(UTF_8).length; + length = s.getBytes(UTF_8).length; } @Benchmark @@ -77,19 +77,19 @@ public String panama_readString() { @Benchmark public String panama_readStringLength() { - return strSegment.getString(0, UTF_8, lengthBytes); + return strSegment.getString(0, UTF_8, length); } @Benchmark - public String panama_copyLength() { - byte[] bytes = new byte[lengthBytes]; - MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, lengthBytes); - return new String(bytes, UTF_8); + public String jni_readString() { + return readString(strSegment.address()); } @Benchmark - public String jni_readString() { - return readString(strSegment.address()); + public String panama_copyLength() { + byte[] bytes = new byte[length]; + MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length); + return new String(bytes, UTF_8); } static native String readString(long addr); From 58525acad19ce33b4a83463fc17bf4eb55682cae Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Wed, 19 Nov 2025 00:05:31 +0100 Subject: [PATCH 06/17] . --- make/common/JavaCompilation.gmk | 2 +- .../share/classes/java/lang/String.java | 8 +- .../share/classes/java/lang/System.java | 8 +- .../java/lang/foreign/MemorySegment.java | 18 ++++ .../java/lang/foreign/SegmentAllocator.java | 61 ++++++++++- .../classes/java/nio/StringCharBuffer.java | 4 +- .../jdk/internal/access/JavaLangAccess.java | 4 +- .../foreign/AbstractMemorySegmentImpl.java | 10 ++ .../jdk/internal/foreign/StringSupport.java | 36 +++++-- .../java/lang/foreign/FromJavaStringTest.java | 101 ++++++++++++++++++ 10 files changed, 231 insertions(+), 21 deletions(-) create mode 100644 test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk index 33f5d10535a0b..6b1af95ec74af 100644 --- a/make/common/JavaCompilation.gmk +++ b/make/common/JavaCompilation.gmk @@ -280,7 +280,7 @@ define SetupJavaCompilationBody $1_FLAGS += -encoding utf-8 ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true) - $1_FLAGS += -Werror + $1_FLAGS += -XDfoo endif ifneq ($$($1_DISABLED_WARNINGS), ) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 52f908c9e98bf..8116e25d30127 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2016,19 +2016,19 @@ public byte[] getBytes() { return encode(Charset.defaultCharset(), coder(), value); } - boolean bytesCompatible(Charset charset) { + boolean bytesCompatible(Charset charset, int srcIndex, int numChars) { if (isLatin1()) { if (charset == ISO_8859_1.INSTANCE) { return true; // ok, same encoding } else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) { - return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible + return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible } } return false; } - void copyToSegmentRaw(MemorySegment segment, long offset) { - MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length); + void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int numChars) { + MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, numChars); } /** diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index c88cf4ac79775..8a62c8233c365 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2315,13 +2315,13 @@ public String getLoaderNameID(ClassLoader loader) { } @Override - public void copyToSegmentRaw(String string, MemorySegment segment, long offset) { - string.copyToSegmentRaw(segment, offset); + public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars) { + string.copyToSegmentRaw(segment, offset, srcIndex, numChars); } @Override - public boolean bytesCompatible(String string, Charset charset) { - return string.bytesCompatible(charset); + public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) { + return string.bytesCompatible(charset, srcIndex, numChars); } }); } diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 3467d91dcbca5..2c33111595b3a 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -2633,6 +2633,24 @@ static void copy(Object srcArray, int srcIndex, elementCount); } + /** + * asd + * + * @param src src + * @param dstEncoding d + * @param srcIndex s + * @param dst d + * @param numChars n + */ + @ForceInline + static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, int numChars) { + Objects.requireNonNull(src); + Objects.requireNonNull(dstEncoding); + Objects.requireNonNull(dst); + + AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, numChars); + } + /** * Finds and returns the relative offset, in bytes, of the first mismatch between the * source and the destination segments. More specifically, the bytes at offset diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 94290f6992be9..cf0f41e312ca5 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -31,7 +31,10 @@ import jdk.internal.foreign.StringSupport; import jdk.internal.vm.annotation.ForceInline; +import java.nio.ByteBuffer; +import java.nio.StringCharBuffer; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.util.Objects; @@ -137,10 +140,10 @@ default MemorySegment allocateFrom(String str, Charset charset) { int termCharSize = StringSupport.CharsetKind.of(charset).codeUnitSize(); MemorySegment segment; int length; - if (StringSupport.bytesCompatible(str, charset)) { + if (StringSupport.bytesCompatible(str, charset, 0, str.length())) { length = str.length(); segment = allocateNoInit((long) length + termCharSize); - StringSupport.copyToSegmentRaw(str, segment, 0); + StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length()); } else { byte[] bytes = str.getBytes(charset); length = bytes.length; @@ -153,6 +156,60 @@ default MemorySegment allocateFrom(String str, Charset charset) { return segment; } + /** + * Converts a Java string into a null-terminated C string using the provided charset, + * and storing the result into a memory segment. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement byte array. The + * {@link java.nio.charset.CharsetEncoder} class should be used when more + * control over the encoding process is required. + *

+ * If the given string contains any {@code '\0'} characters, they will be + * copied as well. This means that, depending on the method used to read + * the string, such as {@link MemorySegment#getString(long)}, the string + * will appear truncated when read again. + * + * @param str the Java string to be converted into a C string + * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the + * string bytes + * @param srcIndex srcIndex + * @param numChars numChars + * @return a new native segment containing the converted C string + * @throws IllegalArgumentException if {@code charset} is not a + * {@linkplain StandardCharsets standard charset} + * @implSpec The default implementation for this method copies the contents of the + * provided Java string into a new memory segment obtained by calling + * {@code this.allocate(B + N)}, where: + *

    + *
  • {@code B} is the size, in bytes, of the string encoded using the + * provided charset (e.g. {@code str.getBytes(charset).length});
  • + *
  • {@code N} is the size (in bytes) of the terminator char according to the + * provided charset. For instance, this is 1 for {@link StandardCharsets#US_ASCII} + * and 2 for {@link StandardCharsets#UTF_16}.
  • + *
+ */ + @ForceInline + default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) { + Objects.requireNonNull(charset); + Objects.requireNonNull(str); + MemorySegment segment; + if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) { + segment = allocateNoInit(numChars); + StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars); + } else if (srcIndex == 0 && numChars == str.length()) { + byte[] bytes = str.getBytes(charset); + segment = allocateNoInit(bytes.length); + MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length); + } else { + StringCharBuffer scb = new StringCharBuffer(str, srcIndex, numChars); + ByteBuffer bytes = charset.encode(scb); + segment = allocateNoInit(bytes.limit()); + MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.limit()); + } + return segment; + } + /** * {@return a new memory segment initialized with the provided byte value} *

diff --git a/src/java.base/share/classes/java/nio/StringCharBuffer.java b/src/java.base/share/classes/java/nio/StringCharBuffer.java index 39cd6910f5d2d..b521238411e65 100644 --- a/src/java.base/share/classes/java/nio/StringCharBuffer.java +++ b/src/java.base/share/classes/java/nio/StringCharBuffer.java @@ -29,12 +29,12 @@ // ## If the sequence is a string, use reflection to share its array -final class StringCharBuffer // package-private +public final class StringCharBuffer extends CharBuffer { CharSequence str; - StringCharBuffer(CharSequence s, int start, int end) { // package-private + public StringCharBuffer(CharSequence s, int start, int end) { // package-private super(-1, start, end, s.length(), null); int n = s.length(); Objects.checkFromToIndex(start, end, n); diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index fa6e5b4aac3a9..df4df8bd73c7f 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -612,10 +612,10 @@ StackWalker newStackWalkerInstance(Set options, /** * Copy the string bytes to an existing segment, avoiding intermediate copies. */ - void copyToSegmentRaw(String string, MemorySegment segment, long offset); + void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars); /** * Are the string bytes compatible with the given charset? */ - boolean bytesCompatible(String string, Charset charset); + boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars); } diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 9f087346bcfb1..58898cd6adc30 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -695,6 +695,16 @@ public static void copy(Object srcArray, int srcIndex, } } + @ForceInline + public static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, int numChars) { + Objects.requireNonNull(src); + Objects.requireNonNull(dstEncoding); + Objects.requireNonNull(dst); + + AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst; + StringSupport.write(destImpl, 0, dstEncoding, src, srcIndex, numChars); + } + // accessors @ForceInline diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index b44e728fae75b..1badbd393eae8 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -33,8 +33,11 @@ import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.MemorySegment; +import java.nio.ByteBuffer; +import java.nio.StringCharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; import static java.lang.foreign.ValueLayout.*; @@ -76,6 +79,11 @@ public static String readBytes(AbstractMemorySegmentImpl segment, long offset, C } } + @ForceInline + public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string, int srcIndex, int length) { + copyBytes(string, segment, charset, offset, srcIndex, length); + } + @ForceInline public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { switch (CharsetKind.of(charset)) { @@ -342,13 +350,13 @@ public static CharsetKind of(Charset charset) { } } - public static boolean bytesCompatible(String string, Charset charset) { - return JAVA_LANG_ACCESS.bytesCompatible(string, charset); + public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) { + return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars); } public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) { - if (bytesCompatible(string, charset)) { - copyToSegmentRaw(string, segment, offset); + if (bytesCompatible(string, charset, 0, string.length())) { + copyToSegmentRaw(string, segment, offset, 0, string.length()); return string.length(); } else { byte[] bytes = string.getBytes(charset); @@ -357,7 +365,23 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse } } - public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) { - JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset); + public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) { + if (bytesCompatible(string, charset, srcIndex, numChars)) { + copyToSegmentRaw(string, segment, offset, srcIndex, numChars); + return string.length(); + } else if (srcIndex == 0 && numChars == string.length()) { + byte[] bytes = string.getBytes(); + MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); + return bytes.length; + } else { + StringCharBuffer scb = new StringCharBuffer(string, srcIndex, numChars); + ByteBuffer byteBuffer = segment.asByteBuffer().position((int) offset); + charset.newEncoder().encode(scb, byteBuffer, false); + return byteBuffer.position(); + } + } + + public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars) { + JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, numChars); } } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java new file mode 100644 index 0000000000000..80e15aa99cb3e --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang.foreign; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork( + value = 3, + jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +public class FromJavaStringTest { + + private String str; + private MemorySegment strSegment; + private int lengthBytes; + + @Param({"5", "20", "100", "200", "451"}) + int size; + + static { + System.loadLibrary("ToJavaString"); + } + + @Setup + public void setup() { + var arena = Arena.ofAuto(); + while (LOREM.length() < size) { + LOREM += LOREM; + } + str = LOREM.substring(0, size); + strSegment = arena.allocateFrom(str); + lengthBytes = str.getBytes(UTF_8).length; + } + + // @Benchmark + // public void panama_setString() { + // strSegment.setString(0, str, UTF_8); + // } + + @Benchmark + public void panama_copy() { + MemorySegment.copy(str, UTF_8, 0, strSegment, str.length()); + } + + // @Benchmark + // public void panama_getBytes() { + // byte[] bytes = str.getBytes(UTF_8); + // MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); + // } + + static String LOREM = + """ + đź’© + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et + dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip + ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu + fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt + mollit anim id est laborum. + """; +} From 3f6ee815694570306011cf28b3787d3b153c605b Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Wed, 19 Nov 2025 09:31:16 +0100 Subject: [PATCH 07/17] Updates from panama-dev thread --- make/common/JavaCompilation.gmk | 2 +- .../java/lang/foreign/MemorySegment.java | 47 ++++++++++++++----- .../java/lang/foreign/SegmentAllocator.java | 34 +++++++------- .../classes/java/nio/StringCharBuffer.java | 4 +- .../foreign/AbstractMemorySegmentImpl.java | 4 +- .../jdk/internal/foreign/StringSupport.java | 30 +++++------- test/jdk/java/foreign/TestStringEncoding.java | 25 +++++----- .../java/lang/foreign/FromJavaStringTest.java | 27 ++++------- 8 files changed, 88 insertions(+), 85 deletions(-) diff --git a/make/common/JavaCompilation.gmk b/make/common/JavaCompilation.gmk index 6b1af95ec74af..33f5d10535a0b 100644 --- a/make/common/JavaCompilation.gmk +++ b/make/common/JavaCompilation.gmk @@ -280,7 +280,7 @@ define SetupJavaCompilationBody $1_FLAGS += -encoding utf-8 ifeq ($$(JAVA_WARNINGS_AS_ERRORS), true) - $1_FLAGS += -XDfoo + $1_FLAGS += -Werror endif ifneq ($$($1_DISABLED_WARNINGS), ) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 2c33111595b3a..1256e9fad0429 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1324,8 +1324,8 @@ MemorySegment reinterpret(long newSize, String getString(long offset, Charset charset); /** - * Reads a string using the given length from this segment at the given offset, - * using the provided charset. + * Reads a string from this segment at the given offset, using the provided length + * and charset. *

* This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement string. The {@link @@ -1336,15 +1336,13 @@ MemorySegment reinterpret(long newSize, * access operation will occur * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the * string bytes - * @param length length to be used for string conversion, in code units for - * the provided charset + * @param length length to be used for string conversion, in bytes * @return a Java string constructed from the bytes read from the given starting * address reading the given length of characters * @throws IllegalArgumentException if the size of the string is greater than the * largest string supported by the platform * @throws IndexOutOfBoundsException if {@code offset < 0} - * @throws IndexOutOfBoundsException if {@code offset > byteSize() - (length * N)}, - * where {@code N} is the size, in bytes, of a code unit in the provided charset + * @throws IndexOutOfBoundsException if {@code offset > byteSize() - length} * @throws IllegalStateException if the {@linkplain #scope() scope} associated with * this segment is not {@linkplain Scope#isAlive() alive} * @throws WrongThreadException if this method is called from a thread {@code T}, @@ -1353,7 +1351,7 @@ MemorySegment reinterpret(long newSize, * {@linkplain StandardCharsets standard charset} * @throws IllegalArgumentException if {@code length < 0} */ - String getString(long offset, Charset charset, int length); + String getString(long offset, Charset charset, long length); /** * Writes the given string into this segment at the given offset, converting it to @@ -2634,13 +2632,36 @@ static void copy(Object srcArray, int srcIndex, } /** - * asd + * Copies the byte sequence of the given string encoded using the provided charset + * to the destination segment. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement string. The {@link + * java.nio.charset.CharsetDecoder} class should be used when more control + * over the decoding process is required. + *

+ * If the given string contains any {@code '\0'} characters, they will be + * copied as well. This means that, depending on the method used to read + * the string, such as {@link MemorySegment#getString(long)}, the string + * will appear truncated when read again. * - * @param src src - * @param dstEncoding d - * @param srcIndex s - * @param dst d - * @param numChars n + * @param src the Java string to be written into this segment + * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode} + * the string bytes. The {@code charset} must be a + * {@linkplain StandardCharsets standard charset} + * @param srcIndex the starting index of the source string + * @param dst the destination segment + * @param numChars the number of characters to be copied + * @throws IllegalStateException if the {@linkplain #scope() scope} associated with + * {@code dst} is not {@linkplain Scope#isAlive() alive} + * @throws WrongThreadException if this method is called from a thread {@code T}, + * such that {@code dst.isAccessibleBy(T) == false} + * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} + * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of + * this {@code String} object, or {@code beginIndex} is larger than {@code endIndex}. + * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} + * @throws IllegalArgumentException if {@code charset} is not a + * {@linkplain StandardCharsets standard charset} */ @ForceInline static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, int numChars) { diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index cf0f41e312ca5..f00de9a199c93 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -32,9 +32,8 @@ import jdk.internal.vm.annotation.ForceInline; import java.nio.ByteBuffer; -import java.nio.StringCharBuffer; +import java.nio.CharBuffer; import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; import java.nio.charset.StandardCharsets; import java.util.Objects; @@ -137,7 +136,7 @@ default MemorySegment allocateFrom(String str) { default MemorySegment allocateFrom(String str, Charset charset) { Objects.requireNonNull(charset); Objects.requireNonNull(str); - int termCharSize = StringSupport.CharsetKind.of(charset).codeUnitSize(); + int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize(); MemorySegment segment; int length; if (StringSupport.bytesCompatible(str, charset, 0, str.length())) { @@ -170,24 +169,23 @@ default MemorySegment allocateFrom(String str, Charset charset) { * the string, such as {@link MemorySegment#getString(long)}, the string * will appear truncated when read again. * - * @param str the Java string to be converted into a C string - * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the - * string bytes - * @param srcIndex srcIndex - * @param numChars numChars + * @param str the Java string to be converted into a C string + * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the + * string bytes + * @param srcIndex the starting index of the source string + * @param numChars the number of characters to be copied * @return a new native segment containing the converted C string * @throws IllegalArgumentException if {@code charset} is not a * {@linkplain StandardCharsets standard charset} + * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} + * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of + * this {@code String} object, or {@code beginIndex} is larger than {@code endIndex}. + * * @implSpec The default implementation for this method copies the contents of the * provided Java string into a new memory segment obtained by calling - * {@code this.allocate(B + N)}, where: - *

    - *
  • {@code B} is the size, in bytes, of the string encoded using the - * provided charset (e.g. {@code str.getBytes(charset).length});
  • - *
  • {@code N} is the size (in bytes) of the terminator char according to the - * provided charset. For instance, this is 1 for {@link StandardCharsets#US_ASCII} - * and 2 for {@link StandardCharsets#UTF_16}.
  • - *
+ * {@code this.allocate(B)}, where {@code B} is the size, in bytes, of + * the string encoded using the provided charset + * (e.g. {@code str.getBytes(charset).length}); */ @ForceInline default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) { @@ -202,8 +200,8 @@ default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, in segment = allocateNoInit(bytes.length); MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length); } else { - StringCharBuffer scb = new StringCharBuffer(str, srcIndex, numChars); - ByteBuffer bytes = charset.encode(scb); + CharBuffer charBuffer = CharBuffer.wrap(str, srcIndex, numChars); + ByteBuffer bytes = charset.encode(charBuffer); segment = allocateNoInit(bytes.limit()); MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.limit()); } diff --git a/src/java.base/share/classes/java/nio/StringCharBuffer.java b/src/java.base/share/classes/java/nio/StringCharBuffer.java index b521238411e65..39cd6910f5d2d 100644 --- a/src/java.base/share/classes/java/nio/StringCharBuffer.java +++ b/src/java.base/share/classes/java/nio/StringCharBuffer.java @@ -29,12 +29,12 @@ // ## If the sequence is a string, use reflection to share its array -public final class StringCharBuffer +final class StringCharBuffer // package-private extends CharBuffer { CharSequence str; - public StringCharBuffer(CharSequence s, int start, int end) { // package-private + StringCharBuffer(CharSequence s, int start, int end) { // package-private super(-1, start, end, s.length(), null); int n = s.length(); Objects.checkFromToIndex(start, end, n); diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 58898cd6adc30..54fee9b19a257 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -536,7 +536,7 @@ public boolean equals(Object o) { } @Override - public String getString(long offset, Charset charset, int length) { + public String getString(long offset, Charset charset, long length) { if (length < 0) { throw new IllegalArgumentException(); } @@ -702,7 +702,7 @@ public static void copy(String src, Charset dstEncoding, int srcIndex, MemorySeg Objects.requireNonNull(dst); AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst; - StringSupport.write(destImpl, 0, dstEncoding, src, srcIndex, numChars); + StringSupport.copyBytes(src, destImpl, dstEncoding, 0, srcIndex, numChars); } // accessors diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 1badbd393eae8..dbcc7e26490c3 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -34,10 +34,9 @@ import java.lang.foreign.MemorySegment; import java.nio.ByteBuffer; -import java.nio.StringCharBuffer; +import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; import static java.lang.foreign.ValueLayout.*; @@ -62,13 +61,13 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse } @ForceInline - public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, int length) { - int lengthBytes = length * CharsetKind.of(charset).codeUnitSize(); - return readBytes(segment, offset, charset, lengthBytes); + public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) { + return readBytes(segment, offset, charset, length); } @ForceInline - public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, int lengthBytes) { + public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) { + final int lengthBytes = (int) length; final byte[] bytes = new byte[lengthBytes]; MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes); try { @@ -79,11 +78,6 @@ public static String readBytes(AbstractMemorySegmentImpl segment, long offset, C } } - @ForceInline - public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string, int srcIndex, int length) { - copyBytes(string, segment, charset, offset, srcIndex, length); - } - @ForceInline public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { switch (CharsetKind.of(charset)) { @@ -320,14 +314,14 @@ public enum CharsetKind { DOUBLE_BYTE(2), QUAD_BYTE(4); - final int codeUnitSize; + final int terminatorCharSize; - CharsetKind(int codeUnitSize) { - this.codeUnitSize = codeUnitSize; + CharsetKind(int terminatorCharSize) { + this.terminatorCharSize = terminatorCharSize; } - public int codeUnitSize() { - return codeUnitSize; + public int terminatorCharSize() { + return terminatorCharSize; } public static CharsetKind of(Charset charset) { @@ -374,9 +368,9 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); return bytes.length; } else { - StringCharBuffer scb = new StringCharBuffer(string, srcIndex, numChars); + CharBuffer charBuffer = CharBuffer.wrap(string, srcIndex, numChars); ByteBuffer byteBuffer = segment.asByteBuffer().position((int) offset); - charset.newEncoder().encode(scb, byteBuffer, false); + charset.newEncoder().encode(charBuffer, byteBuffer, false); return byteBuffer.position(); } } diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index c8bafb1b8d861..cc69233d761b6 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -77,8 +77,17 @@ public void testStrings(String testString) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); + int terminatorSize = "\0".getBytes(charset).length; + if (charset == StandardCharsets.UTF_16) { + terminatorSize -= 2; // drop BOM + } + // Note that the JDK's UTF_32 encoder doesn't add a BOM. + // This is legal under the Unicode standard, and means the byte order is BE. + // See: https://unicode.org/faq/utf_bom.html#gen7 + int expectedByteLength = - testString.getBytes(charset).length + codeUnitSize(charset); + testString.getBytes(charset).length + + terminatorSize; assertEquals(text.byteSize(), expectedByteLength); @@ -104,7 +113,7 @@ public void testStringsLength(String testString) { for (Arena arena : arenas()) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset); - int length = testString.getBytes(charset).length / codeUnitSize(charset); + int length = testString.getBytes(charset).length; String roundTrip = text.getString(0, charset, length); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, testString); @@ -562,16 +571,4 @@ public static Object[][] charsetsAndSegments() { } return values.toArray(Object[][]::new); } - - static int codeUnitSize(Charset charset) { - int codeUnitSize = "\0".getBytes(charset).length; - if (charset == StandardCharsets.UTF_16) { - codeUnitSize -= 2; // drop BOM - } - // Note that the JDK's UTF_32 encoder doesn't add a BOM. - // This is legal under the Unicode standard, and means the byte order is BE. - // See: https://unicode.org/faq/utf_bom.html#gen7 - return codeUnitSize; - } - } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java index 80e15aa99cb3e..08babaf49a650 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -46,9 +46,7 @@ @Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) @State(Scope.Benchmark) @OutputTimeUnit(TimeUnit.NANOSECONDS) -@Fork( - value = 3, - jvmArgs = {"--enable-native-access=ALL-UNNAMED", "-Djava.library.path=micro/native"}) +@Fork(value = 3) public class FromJavaStringTest { private String str; @@ -58,10 +56,6 @@ public class FromJavaStringTest { @Param({"5", "20", "100", "200", "451"}) int size; - static { - System.loadLibrary("ToJavaString"); - } - @Setup public void setup() { var arena = Arena.ofAuto(); @@ -73,25 +67,24 @@ public void setup() { lengthBytes = str.getBytes(UTF_8).length; } - // @Benchmark - // public void panama_setString() { - // strSegment.setString(0, str, UTF_8); - // } + @Benchmark + public void panama_setString() { + strSegment.setString(0, str, UTF_8); + } @Benchmark public void panama_copy() { MemorySegment.copy(str, UTF_8, 0, strSegment, str.length()); } - // @Benchmark - // public void panama_getBytes() { - // byte[] bytes = str.getBytes(UTF_8); - // MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); - // } + @Benchmark + public void panama_getBytes() { + byte[] bytes = str.getBytes(UTF_8); + MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); + } static String LOREM = """ - đź’© Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu From 0593827ff6fe0297379c4953a3c67881ae8ccf6c Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Wed, 19 Nov 2025 11:10:58 +0100 Subject: [PATCH 08/17] Add a dstOffset parameter, stop using StringCharBuffer/CharsetEncoder::encode --- .../java/lang/foreign/MemorySegment.java | 17 ++++++++++------- .../java/lang/foreign/SegmentAllocator.java | 11 ++--------- .../foreign/AbstractMemorySegmentImpl.java | 4 ++-- .../jdk/internal/foreign/StringSupport.java | 11 ++--------- 4 files changed, 16 insertions(+), 27 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 1256e9fad0429..8da2c1c844dc6 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1296,7 +1296,7 @@ MemorySegment reinterpret(long newSize, * over the decoding process is required. *

* Getting a string from a segment with a known byte offset and - * known byte length can be done using {@link #getString(long, Charset, int)}. + * known byte length can be done using {@link #getString(long, Charset, long)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -2651,25 +2651,28 @@ static void copy(Object srcArray, int srcIndex, * {@linkplain StandardCharsets standard charset} * @param srcIndex the starting index of the source string * @param dst the destination segment + * @param dstOffset the starting offset, in bytes, of the destination segment * @param numChars the number of characters to be copied * @throws IllegalStateException if the {@linkplain #scope() scope} associated with * {@code dst} is not {@linkplain Scope#isAlive() alive} * @throws WrongThreadException if this method is called from a thread {@code T}, * such that {@code dst.isAccessibleBy(T) == false} - * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of + * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset} + * are {@code < 0} + * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of * this {@code String} object, or {@code beginIndex} is larger than {@code endIndex}. * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} - * @throws IllegalArgumentException if {@code charset} is not a - * {@linkplain StandardCharsets standard charset} + * @throws IllegalArgumentException if {@code charset} is not a {@linkplain StandardCharsets standard charset} + * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, + * in bytes, of the string encoded using the given charset. */ @ForceInline - static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, int numChars) { + static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { Objects.requireNonNull(src); Objects.requireNonNull(dstEncoding); Objects.requireNonNull(dst); - AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, numChars); + AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars); } /** diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index f00de9a199c93..1a7f03359aaa5 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -31,8 +31,6 @@ import jdk.internal.foreign.StringSupport; import jdk.internal.vm.annotation.ForceInline; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.Charset; import java.nio.charset.StandardCharsets; import java.util.Objects; @@ -195,15 +193,10 @@ default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, in if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) { segment = allocateNoInit(numChars); StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars); - } else if (srcIndex == 0 && numChars == str.length()) { - byte[] bytes = str.getBytes(charset); + } else { + byte[] bytes = str.substring(srcIndex, numChars).getBytes(charset); segment = allocateNoInit(bytes.length); MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length); - } else { - CharBuffer charBuffer = CharBuffer.wrap(str, srcIndex, numChars); - ByteBuffer bytes = charset.encode(charBuffer); - segment = allocateNoInit(bytes.limit()); - MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.limit()); } return segment; } diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 54fee9b19a257..581cf7acde08e 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -696,13 +696,13 @@ public static void copy(Object srcArray, int srcIndex, } @ForceInline - public static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, int numChars) { + public static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { Objects.requireNonNull(src); Objects.requireNonNull(dstEncoding); Objects.requireNonNull(dst); AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst; - StringSupport.copyBytes(src, destImpl, dstEncoding, 0, srcIndex, numChars); + StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars); } // accessors diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index dbcc7e26490c3..e1d3d0d696386 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -33,8 +33,6 @@ import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.MemorySegment; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; @@ -363,15 +361,10 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse if (bytesCompatible(string, charset, srcIndex, numChars)) { copyToSegmentRaw(string, segment, offset, srcIndex, numChars); return string.length(); - } else if (srcIndex == 0 && numChars == string.length()) { - byte[] bytes = string.getBytes(); + } else { + byte[] bytes = string.substring(srcIndex, numChars).getBytes(charset); MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); return bytes.length; - } else { - CharBuffer charBuffer = CharBuffer.wrap(string, srcIndex, numChars); - ByteBuffer byteBuffer = segment.asByteBuffer().position((int) offset); - charset.newEncoder().encode(charBuffer, byteBuffer, false); - return byteBuffer.position(); } } From faa4c5b1dc2c1c0d8b973391e329645c9f2eb88b Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Thu, 20 Nov 2025 09:50:10 +0100 Subject: [PATCH 09/17] Review feedback --- src/java.base/share/classes/java/lang/String.java | 7 +++++-- src/java.base/share/classes/java/lang/System.java | 4 ++-- .../share/classes/java/lang/foreign/MemorySegment.java | 3 +-- .../share/classes/jdk/internal/access/JavaLangAccess.java | 2 +- .../share/classes/jdk/internal/foreign/StringSupport.java | 4 ++-- 5 files changed, 11 insertions(+), 9 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 8116e25d30127..1f9144c31cf1e 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2027,8 +2027,11 @@ boolean bytesCompatible(Charset charset, int srcIndex, int numChars) { return false; } - void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int numChars) { - MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, numChars); + void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) { + if (!isLatin1()) { + throw new IllegalStateException("This string does not support copyToSegmentRaw"); + } + MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength); } /** diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index 8a62c8233c365..cb2b3022dfd72 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2315,8 +2315,8 @@ public String getLoaderNameID(ClassLoader loader) { } @Override - public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars) { - string.copyToSegmentRaw(segment, offset, srcIndex, numChars); + public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) { + string.copyToSegmentRaw(segment, offset, srcIndex, srcLength); } @Override diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 8da2c1c844dc6..939697fb7c622 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -2647,8 +2647,7 @@ static void copy(Object srcArray, int srcIndex, * * @param src the Java string to be written into this segment * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode} - * the string bytes. The {@code charset} must be a - * {@linkplain StandardCharsets standard charset} + * the string bytes. * @param srcIndex the starting index of the source string * @param dst the destination segment * @param dstOffset the starting offset, in bytes, of the destination segment diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index df4df8bd73c7f..9e1a138acfae0 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -612,7 +612,7 @@ StackWalker newStackWalkerInstance(Set options, /** * Copy the string bytes to an existing segment, avoiding intermediate copies. */ - void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars); + void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength); /** * Are the string bytes compatible with the given charset? diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index e1d3d0d696386..d5a38808dd0e8 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -368,7 +368,7 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse } } - public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int numChars) { - JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, numChars); + public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) { + JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength); } } From 214418f28d72efa0ac700ac3e4f2b4b101830a8a Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Thu, 20 Nov 2025 20:43:55 +0100 Subject: [PATCH 10/17] Review feedback * document assertion to link to bytesCompatible * throw IAE for length > Integer.MAX_VALUE * javadoc fixes --- src/java.base/share/classes/java/lang/String.java | 4 ++++ .../share/classes/java/lang/foreign/MemorySegment.java | 8 ++++---- .../share/classes/java/lang/foreign/SegmentAllocator.java | 8 +++----- .../share/classes/jdk/internal/foreign/StringSupport.java | 3 +++ test/jdk/java/foreign/TestStringEncoding.java | 5 ++--- 5 files changed, 16 insertions(+), 12 deletions(-) diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 1f9144c31cf1e..8dac474accad5 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2029,6 +2029,10 @@ boolean bytesCompatible(Charset charset, int srcIndex, int numChars) { void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) { if (!isLatin1()) { + // This method is intended to be used together with bytesCompatible, which currently only supports + // latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like + // UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain + // unpaired surrogates). If that happens, copyToSegmentRaw should also be updated. throw new IllegalStateException("This string does not support copyToSegmentRaw"); } MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength); diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 939697fb7c622..1610c75f7aa87 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1336,9 +1336,9 @@ MemorySegment reinterpret(long newSize, * access operation will occur * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the * string bytes - * @param length length to be used for string conversion, in bytes + * @param length length in bytes of the string to read * @return a Java string constructed from the bytes read from the given starting - * address reading the given length of characters + * address reading the given length of bytes * @throws IllegalArgumentException if the size of the string is greater than the * largest string supported by the platform * @throws IndexOutOfBoundsException if {@code offset < 0} @@ -2658,8 +2658,8 @@ static void copy(Object srcArray, int srcIndex, * such that {@code dst.isAccessibleBy(T) == false} * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset} * are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of - * this {@code String} object, or {@code beginIndex} is larger than {@code endIndex}. + * @throws IndexOutOfBoundsException if the {@code numChars} is larger than the length of + * this {@code String} object, or {@code srcIndex} is larger than {@code numChars}. * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} * @throws IllegalArgumentException if {@code charset} is not a {@linkplain StandardCharsets standard charset} * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 1a7f03359aaa5..8c1cff54f2fd4 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -154,7 +154,7 @@ default MemorySegment allocateFrom(String str, Charset charset) { } /** - * Converts a Java string into a null-terminated C string using the provided charset, + * Converts a Java string into a C string using the provided charset, * and storing the result into a memory segment. *

* This method always replaces malformed-input and unmappable-character @@ -173,11 +173,9 @@ default MemorySegment allocateFrom(String str, Charset charset) { * @param srcIndex the starting index of the source string * @param numChars the number of characters to be copied * @return a new native segment containing the converted C string - * @throws IllegalArgumentException if {@code charset} is not a - * {@linkplain StandardCharsets standard charset} * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code endIndex} is larger than the length of - * this {@code String} object, or {@code beginIndex} is larger than {@code endIndex}. + * @throws IndexOutOfBoundsException if the {@code numChars} is larger than the length of + * this {@code String} object, or {@code srcIndex} is larger than {@code numChars}. * * @implSpec The default implementation for this method copies the contents of the * provided Java string into a new memory segment obtained by calling diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index d5a38808dd0e8..285d7307205e5 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -65,6 +65,9 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse @ForceInline public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) { + if (length > Integer.MAX_VALUE) { + throw new IllegalArgumentException("Required length exceeds implementation limit"); + } final int lengthBytes = (int) length; final byte[] bytes = new byte[lengthBytes]; MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes); diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index cc69233d761b6..45b5bf90e0ced 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -112,9 +112,8 @@ public void testStringsLength(String testString) { if (isStandard(charset)) { for (Arena arena : arenas()) { try (arena) { - MemorySegment text = arena.allocateFrom(testString, charset); - int length = testString.getBytes(charset).length; - String roundTrip = text.getString(0, charset, length); + MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length()); + String roundTrip = text.getString(0, charset, text.byteSize()); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, testString); } From 31df3a22e3ea4b9eaa9e396b040abef8dbaefd0a Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Fri, 21 Nov 2025 11:32:01 +0100 Subject: [PATCH 11/17] Improve test coverage, and more fixes --- .../java/lang/foreign/MemorySegment.java | 7 +- .../java/lang/foreign/SegmentAllocator.java | 6 +- .../jdk/internal/foreign/StringSupport.java | 2 +- test/jdk/java/foreign/TestStringEncoding.java | 169 +++++++++++++++++- 4 files changed, 170 insertions(+), 14 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 1610c75f7aa87..378e9f479a037 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1347,8 +1347,6 @@ MemorySegment reinterpret(long newSize, * this segment is not {@linkplain Scope#isAlive() alive} * @throws WrongThreadException if this method is called from a thread {@code T}, * such that {@code isAccessibleBy(T) == false} - * @throws IllegalArgumentException if {@code charset} is not a - * {@linkplain StandardCharsets standard charset} * @throws IllegalArgumentException if {@code length < 0} */ String getString(long offset, Charset charset, long length); @@ -2658,10 +2656,9 @@ static void copy(Object srcArray, int srcIndex, * such that {@code dst.isAccessibleBy(T) == false} * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset} * are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code numChars} is larger than the length of - * this {@code String} object, or {@code srcIndex} is larger than {@code numChars}. + * @throws IndexOutOfBoundsException if the {@code numChars + srcIndex} is larger than the length of + * this {@code String} object. * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} - * @throws IllegalArgumentException if {@code charset} is not a {@linkplain StandardCharsets standard charset} * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, * in bytes, of the string encoded using the given charset. */ diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 8c1cff54f2fd4..6d36e26522082 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -174,8 +174,8 @@ default MemorySegment allocateFrom(String str, Charset charset) { * @param numChars the number of characters to be copied * @return a new native segment containing the converted C string * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code numChars} is larger than the length of - * this {@code String} object, or {@code srcIndex} is larger than {@code numChars}. + * @throws IndexOutOfBoundsException if the {@code numChars + srcIndex} is larger than the length of + * this {@code String} object. * * @implSpec The default implementation for this method copies the contents of the * provided Java string into a new memory segment obtained by calling @@ -192,7 +192,7 @@ default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, in segment = allocateNoInit(numChars); StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars); } else { - byte[] bytes = str.substring(srcIndex, numChars).getBytes(charset); + byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset); segment = allocateNoInit(bytes.length); MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length); } diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 285d7307205e5..007d9f4d91874 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -365,7 +365,7 @@ public static int copyBytes(String string, MemorySegment segment, Charset charse copyToSegmentRaw(string, segment, offset, srcIndex, numChars); return string.length(); } else { - byte[] bytes = string.substring(srcIndex, numChars).getBytes(charset); + byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset); MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); return bytes.length; } diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index 45b5bf90e0ced..457d681536a60 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -105,15 +105,40 @@ public void testStrings(String testString) { @Test(dataProvider = "strings") public void testStringsLength(String testString) { - Set excluded = Set.of("yen", "snowman", "rainbow"); - // This test only works for certain strings where the last character is not special + Set excluded = Set.of("yen"); + // This test only works for strings that can round trip through the given charsets if (!testString.isEmpty() && excluded.stream().noneMatch(testString::startsWith)) { for (Charset charset : Charset.availableCharsets().values()) { - if (isStandard(charset)) { + if (charset.canEncode()) { for (Arena arena : arenas()) { try (arena) { MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length()); - String roundTrip = text.getString(0, charset, text.byteSize()); + long length = text.byteSize(); + assertEquals(length, testString.getBytes(charset).length); + String roundTrip = text.getString(0, charset, length); + if (charset.newEncoder().canEncode(testString)) { + assertEquals(roundTrip, testString); + } + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testStringsCopy(String testString) { + Set excluded = Set.of("yen"); + // This test only works for strings that can round trip through the given charsets + if (!testString.isEmpty() && excluded.stream().noneMatch(testString::startsWith)) { + for (Charset charset : Charset.availableCharsets().values()) { + if (charset.canEncode()) { + for (Arena arena : arenas()) { + try (arena) { + byte[] bytes = testString.getBytes(charset); + MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length); + MemorySegment.copy(testString, charset, 0, text, 0, testString.length()); + String roundTrip = text.getString(0, charset, bytes.length); if (charset.newEncoder().canEncode(testString)) { assertEquals(roundTrip, testString); } @@ -128,7 +153,74 @@ public void testStringsLength(String testString) { public void testStringsLengthNegative() { try (Arena arena = Arena.ofConfined()) { var segment = arena.allocateFrom("abc"); - assertThrows(IllegalArgumentException.class, () -> segment.getString(0, StandardCharsets.UTF_8, -1)); + assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1)); + } + } + + @Test + public void testCopyThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + MemorySegment text = arena.allocate(JAVA_BYTE, 3); + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length()); + // srcIndex < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length())); + // dstOffset < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length())); + // numChars < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1)); + // srcIndex + numChars > length + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length())); + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1)); + // dstOffset > byteSize() + B + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length())); + } + } + + @Test + public void testAllocateFromThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length()); + arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1); + // srcIndex < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length())); + // numChars < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1)); + // srcIndex + numChars > length + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1)); + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length())); + } + } + + @Test + public void testGetStringThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length()); + text.getString(0, StandardCharsets.UTF_8, 3); + // unsupported string size + assertThrows(IllegalArgumentException.class, () -> + text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L)); + // offset < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + text.getString(-1, StandardCharsets.UTF_8, 3)); + // offset > byteSize() - length + assertThrows(IndexOutOfBoundsException.class, () -> + text.getString(1, StandardCharsets.UTF_8, 3)); + // length < 0 + assertThrows(IllegalArgumentException.class, () -> + text.getString(0, StandardCharsets.UTF_8, -1)); } } @@ -251,6 +343,73 @@ public void testOffset(String testString) { } } + @Test(dataProvider = "strings") + public void testSubstringGetString(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length()); + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + // this test assumes single-byte charsets + String roundTrip = text.getString(srcIndex, charset, numChars); + String substring = testString.substring(srcIndex, srcIndex + numChars); + assertEquals(roundTrip, substring); + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testSubstringAllocate(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars); + String substring = testString.substring(srcIndex, srcIndex + numChars); + assertEquals(text.byteSize(), substring.getBytes(charset).length); + String roundTrip = text.getString(0, charset, text.byteSize()); + assertEquals(roundTrip, substring); + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testSubstringCopy(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + String substring = testString.substring(srcIndex, srcIndex + numChars); + long length = substring.getBytes(charset).length; + MemorySegment text = arena.allocate(JAVA_BYTE, length); + MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars); + String roundTrip = text.getString(0, charset, length); + assertEquals(roundTrip, substring); + } + } + } + } + } + } + private static final MemoryLayout CHAR_POINTER = ADDRESS .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)); private static final Linker LINKER = Linker.nativeLinker(); From 489bf15046ae1c57ea824fccc38919ba99af0d7b Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Fri, 21 Nov 2025 14:44:09 +0100 Subject: [PATCH 12/17] Review feedback * handle numChars + srcIndex overflow, and add tests * replace yen with a character that round trips --- .../java/lang/foreign/MemorySegment.java | 4 +-- .../java/lang/foreign/SegmentAllocator.java | 4 +-- .../jdk/internal/foreign/StringSupport.java | 3 ++ test/jdk/java/foreign/TestStringEncoding.java | 29 ++++++++++++++----- .../java/lang/foreign/FromJavaStringTest.java | 2 +- 5 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 378e9f479a037..661b256861d1c 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -2656,8 +2656,7 @@ static void copy(Object srcArray, int srcIndex, * such that {@code dst.isAccessibleBy(T) == false} * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset} * are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code numChars + srcIndex} is larger than the length of - * this {@code String} object. + * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars} * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, * in bytes, of the string encoded using the given charset. @@ -2667,6 +2666,7 @@ static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment ds Objects.requireNonNull(src); Objects.requireNonNull(dstEncoding); Objects.requireNonNull(dst); + Objects.checkFromIndexSize(srcIndex, numChars, src.length()); AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars); } diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 6d36e26522082..f443ef1eea52d 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -174,8 +174,7 @@ default MemorySegment allocateFrom(String str, Charset charset) { * @param numChars the number of characters to be copied * @return a new native segment containing the converted C string * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} - * @throws IndexOutOfBoundsException if the {@code numChars + srcIndex} is larger than the length of - * this {@code String} object. + * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars} * * @implSpec The default implementation for this method copies the contents of the * provided Java string into a new memory segment obtained by calling @@ -187,6 +186,7 @@ default MemorySegment allocateFrom(String str, Charset charset) { default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) { Objects.requireNonNull(charset); Objects.requireNonNull(str); + Objects.checkFromIndexSize(srcIndex, numChars, str.length()); MemorySegment segment; if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) { segment = allocateNoInit(numChars); diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 007d9f4d91874..1484cb53bf82f 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -30,11 +30,14 @@ import jdk.internal.misc.ScopedMemoryAccess; import jdk.internal.util.Architecture; import jdk.internal.util.ArraysSupport; +import jdk.internal.util.Preconditions; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.MemorySegment; +import java.lang.reflect.Array; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.util.Objects; import static java.lang.foreign.ValueLayout.*; diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index 457d681536a60..cc8ba52aa93f1 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -105,9 +105,7 @@ public void testStrings(String testString) { @Test(dataProvider = "strings") public void testStringsLength(String testString) { - Set excluded = Set.of("yen"); - // This test only works for strings that can round trip through the given charsets - if (!testString.isEmpty() && excluded.stream().noneMatch(testString::startsWith)) { + if (!testString.isEmpty()) { for (Charset charset : Charset.availableCharsets().values()) { if (charset.canEncode()) { for (Arena arena : arenas()) { @@ -128,9 +126,7 @@ public void testStringsLength(String testString) { @Test(dataProvider = "strings") public void testStringsCopy(String testString) { - Set excluded = Set.of("yen"); - // This test only works for strings that can round trip through the given charsets - if (!testString.isEmpty() && excluded.stream().noneMatch(testString::startsWith)) { + if (!testString.isEmpty()) { for (Charset charset : Charset.availableCharsets().values()) { if (charset.canEncode()) { for (Arena arena : arenas()) { @@ -161,8 +157,14 @@ public void testStringsLengthNegative() { public void testCopyThrows() { try (Arena arena = Arena.ofConfined()) { String testString = "abc"; + String testString_notBytesCompatible = "snowman \u26C4"; MemorySegment text = arena.allocate(JAVA_BYTE, 3); + MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE, + testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length); MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length()); + MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0, + text_notBytesCompatible, 0, + testString_notBytesCompatible.length()); // srcIndex < 0 assertThrows(IndexOutOfBoundsException.class, () -> MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length())); @@ -177,9 +179,14 @@ public void testCopyThrows() { MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length())); assertThrows(IndexOutOfBoundsException.class, () -> MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1)); - // dstOffset > byteSize() + B + // dstOffset > byteSize() - B assertThrows(IndexOutOfBoundsException.class, () -> MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length())); + // srcIndex + numChars overflows + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3)); + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3)); } } @@ -187,6 +194,7 @@ public void testCopyThrows() { public void testAllocateFromThrows() { try (Arena arena = Arena.ofConfined()) { String testString = "abc"; + String testString_notBytesCompatible = "snowman \u26C4"; arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length()); arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1); // srcIndex < 0 @@ -200,6 +208,11 @@ public void testAllocateFromThrows() { arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1)); assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length())); + // srcIndex + numChars overflows + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE)); + assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom( + testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE)); } } @@ -591,7 +604,7 @@ public static Object[][] strings() { {""}, {"X"}, {"12345"}, - {"yen \u00A5"}, + {"section \u00A7"}, {"snowman \u26C4"}, {"rainbow \uD83C\uDF08"}, {"0"}, diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java index 08babaf49a650..1245779b1be33 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -74,7 +74,7 @@ public void panama_setString() { @Benchmark public void panama_copy() { - MemorySegment.copy(str, UTF_8, 0, strSegment, str.length()); + MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length()); } @Benchmark From 903696b18d2866c76ec8729f42eb85231ee85e5e Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Fri, 21 Nov 2025 15:20:32 +0100 Subject: [PATCH 13/17] Use Utils.checkNonNegativeArgument --- .../jdk/internal/foreign/AbstractMemorySegmentImpl.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 581cf7acde08e..5dc087ac6b371 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -537,9 +537,7 @@ public boolean equals(Object o) { @Override public String getString(long offset, Charset charset, long length) { - if (length < 0) { - throw new IllegalArgumentException(); - } + Utils.checkNonNegativeArgument(length, "length"); Objects.requireNonNull(charset); return StringSupport.read(this, offset, charset, length); } From 3b206ec0230bbff19a6e91d2dbb85812c99d4334 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Fri, 21 Nov 2025 15:54:04 +0100 Subject: [PATCH 14/17] More javadoc updates --- .../share/classes/java/lang/foreign/MemorySegment.java | 8 +++++--- .../share/classes/java/lang/foreign/SegmentAllocator.java | 8 ++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 661b256861d1c..e2c8db8523760 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1331,6 +1331,8 @@ MemorySegment reinterpret(long newSize, * sequences with this charset's default replacement string. The {@link * java.nio.charset.CharsetDecoder} class should be used when more control * over the decoding process is required. + *

+ * If the string contains any {@code '\0'} characters, they will be read as well. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -1338,7 +1340,7 @@ MemorySegment reinterpret(long newSize, * string bytes * @param length length in bytes of the string to read * @return a Java string constructed from the bytes read from the given starting - * address reading the given length of bytes + * address up to the given length * @throws IllegalArgumentException if the size of the string is greater than the * largest string supported by the platform * @throws IndexOutOfBoundsException if {@code offset < 0} @@ -2643,10 +2645,10 @@ static void copy(Object srcArray, int srcIndex, * the string, such as {@link MemorySegment#getString(long)}, the string * will appear truncated when read again. * - * @param src the Java string to be written into this segment + * @param src the Java string to be written into the destination segment * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode} * the string bytes. - * @param srcIndex the starting index of the source string + * @param srcIndex the starting character index of the source string * @param dst the destination segment * @param dstOffset the starting offset, in bytes, of the destination segment * @param numChars the number of characters to be copied diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index f443ef1eea52d..65c62133b0f34 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -154,8 +154,8 @@ default MemorySegment allocateFrom(String str, Charset charset) { } /** - * Converts a Java string into a C string using the provided charset, - * and storing the result into a memory segment. + * Encodes a Java string using the provided charset and stores the resulting + * byte array into a memory segment. *

* This method always replaces malformed-input and unmappable-character * sequences with this charset's default replacement byte array. The @@ -167,12 +167,12 @@ default MemorySegment allocateFrom(String str, Charset charset) { * the string, such as {@link MemorySegment#getString(long)}, the string * will appear truncated when read again. * - * @param str the Java string to be converted into a C string + * @param str the Java string to be encoded * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the * string bytes * @param srcIndex the starting index of the source string * @param numChars the number of characters to be copied - * @return a new native segment containing the converted C string + * @return a new native segment containing the encoded string * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars} * From a84b9ea55dce854fd5fdb93b4753b9ab491a3c14 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Sat, 22 Nov 2025 13:19:57 +0100 Subject: [PATCH 15/17] Return the number of copied bytes --- .../classes/java/lang/foreign/MemorySegment.java | 5 +++-- .../internal/foreign/AbstractMemorySegmentImpl.java | 4 ++-- .../classes/jdk/internal/foreign/StringSupport.java | 11 ++--------- test/jdk/java/foreign/TestStringEncoding.java | 3 ++- 4 files changed, 9 insertions(+), 14 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index e2c8db8523760..8c0e62bee200e 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -2662,15 +2662,16 @@ static void copy(Object srcArray, int srcIndex, * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, * in bytes, of the string encoded using the given charset. + * @return the number of copied bytes. */ @ForceInline - static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { + static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { Objects.requireNonNull(src); Objects.requireNonNull(dstEncoding); Objects.requireNonNull(dst); Objects.checkFromIndexSize(srcIndex, numChars, src.length()); - AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars); + return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars); } /** diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index 5dc087ac6b371..a98d318a2422c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -694,13 +694,13 @@ public static void copy(Object srcArray, int srcIndex, } @ForceInline - public static void copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { + public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { Objects.requireNonNull(src); Objects.requireNonNull(dstEncoding); Objects.requireNonNull(dst); AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst; - StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars); + return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars); } // accessors diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index 1484cb53bf82f..7cbe8ab0d48cb 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -353,20 +353,13 @@ public static boolean bytesCompatible(String string, Charset charset, int srcInd } public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) { - if (bytesCompatible(string, charset, 0, string.length())) { - copyToSegmentRaw(string, segment, offset, 0, string.length()); - return string.length(); - } else { - byte[] bytes = string.getBytes(charset); - MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); - return bytes.length; - } + return copyBytes(string, segment, charset, offset, 0, string.length()); } public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) { if (bytesCompatible(string, charset, srcIndex, numChars)) { copyToSegmentRaw(string, segment, offset, srcIndex, numChars); - return string.length(); + return numChars; } else { byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset); MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index cc8ba52aa93f1..e9e47420a6844 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -413,9 +413,10 @@ public void testSubstringCopy(String testString) { String substring = testString.substring(srcIndex, srcIndex + numChars); long length = substring.getBytes(charset).length; MemorySegment text = arena.allocate(JAVA_BYTE, length); - MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars); + long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars); String roundTrip = text.getString(0, charset, length); assertEquals(roundTrip, substring); + assertEquals(copied, length); } } } From 396f9426109ef3433d7ee2abea0c9b54e6a16a53 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Mon, 24 Nov 2025 10:43:27 +0100 Subject: [PATCH 16/17] Update discussion of truncated reads of strings containing \0 --- .../share/classes/java/lang/foreign/MemorySegment.java | 9 +++++++-- .../classes/java/lang/foreign/SegmentAllocator.java | 6 ++++-- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 8c0e62bee200e..fa4534adf915f 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1333,6 +1333,9 @@ MemorySegment reinterpret(long newSize, * over the decoding process is required. *

* If the string contains any {@code '\0'} characters, they will be read as well. + * This differs from {@link #getString(long, Charset)}, which will only read up + * to the first {@code '\0'}, resulting in truncation for string data that contains + * the {@code '\0'} character. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -1391,7 +1394,8 @@ MemorySegment reinterpret(long newSize, * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link #getString(long, Charset, long)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur, the final address of this write @@ -2643,7 +2647,8 @@ static void copy(Object srcArray, int srcIndex, * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link #getString(long, Charset, long)}. * * @param src the Java string to be written into the destination segment * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode} diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 65c62133b0f34..5b213af544f74 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -111,7 +111,8 @@ default MemorySegment allocateFrom(String str) { * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link MemorySegment#getString(long, Charset, long)}. * * @param str the Java string to be converted into a C string * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the @@ -165,7 +166,8 @@ default MemorySegment allocateFrom(String str, Charset charset) { * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link MemorySegment#getString(long, Charset, long)}. * * @param str the Java string to be encoded * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the From 6065acd88dc5d178a3566cd954f50dcb128ee513 Mon Sep 17 00:00:00 2001 From: Liam Miller-Cushon Date: Tue, 25 Nov 2025 20:04:51 +0100 Subject: [PATCH 17/17] Review feedback --- .../share/classes/java/lang/foreign/MemorySegment.java | 5 +++-- .../openjdk/bench/java/lang/foreign/FromJavaStringTest.java | 6 +++--- .../openjdk/bench/java/lang/foreign/ToJavaStringTest.java | 6 +++--- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index fa4534adf915f..195955b1a9218 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1341,7 +1341,8 @@ MemorySegment reinterpret(long newSize, * access operation will occur * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the * string bytes - * @param length length in bytes of the string to read + * @param length length, in bytes, of the region of memory to read and decode into + * a string * @return a Java string constructed from the bytes read from the given starting * address up to the given length * @throws IllegalArgumentException if the size of the string is greater than the @@ -2666,7 +2667,7 @@ static void copy(Object srcArray, int srcIndex, * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars} * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, - * in bytes, of the string encoded using the given charset. + * in bytes, of the substring of {@code src} encoded using the given charset * @return the number of copied bytes. */ @ForceInline diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java index 1245779b1be33..ba559b52344b8 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -68,17 +68,17 @@ public void setup() { } @Benchmark - public void panama_setString() { + public void segment_setString() { strSegment.setString(0, str, UTF_8); } @Benchmark - public void panama_copy() { + public void segment_copyStringRaw() { MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length()); } @Benchmark - public void panama_getBytes() { + public void segment_copyStringBytes() { byte[] bytes = str.getBytes(UTF_8); MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); } diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java index 0d90ac7bfe9a4..c3e8f3aaca425 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java @@ -71,12 +71,12 @@ public void setup() { } @Benchmark - public String panama_readString() { + public String segment_getString() { return strSegment.getString(0); } @Benchmark - public String panama_readStringLength() { + public String segment_getStringLength() { return strSegment.getString(0, UTF_8, length); } @@ -86,7 +86,7 @@ public String jni_readString() { } @Benchmark - public String panama_copyLength() { + public String segment_copyStringBytes() { byte[] bytes = new byte[length]; MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length); return new String(bytes, UTF_8);