diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java index 52f908c9e98bf..8dac474accad5 100644 --- a/src/java.base/share/classes/java/lang/String.java +++ b/src/java.base/share/classes/java/lang/String.java @@ -2016,19 +2016,26 @@ public byte[] getBytes() { return encode(Charset.defaultCharset(), coder(), value); } - boolean bytesCompatible(Charset charset) { + boolean bytesCompatible(Charset charset, int srcIndex, int numChars) { if (isLatin1()) { if (charset == ISO_8859_1.INSTANCE) { return true; // ok, same encoding } else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) { - return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible + return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible } } return false; } - void copyToSegmentRaw(MemorySegment segment, long offset) { - MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length); + void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) { + if (!isLatin1()) { + // This method is intended to be used together with bytesCompatible, which currently only supports + // latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like + // UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain + // unpaired surrogates). If that happens, copyToSegmentRaw should also be updated. + throw new IllegalStateException("This string does not support copyToSegmentRaw"); + } + MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength); } /** diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java index c88cf4ac79775..cb2b3022dfd72 100644 --- a/src/java.base/share/classes/java/lang/System.java +++ b/src/java.base/share/classes/java/lang/System.java @@ -2315,13 +2315,13 @@ public String getLoaderNameID(ClassLoader loader) { } @Override - public void copyToSegmentRaw(String string, MemorySegment segment, long offset) { - string.copyToSegmentRaw(segment, offset); + public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) { + string.copyToSegmentRaw(segment, offset, srcIndex, srcLength); } @Override - public boolean bytesCompatible(String string, Charset charset) { - return string.bytesCompatible(charset); + public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) { + return string.bytesCompatible(charset, srcIndex, numChars); } }); } diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java index 196f44d1abedc..195955b1a9218 100644 --- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java +++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java @@ -1296,12 +1296,7 @@ MemorySegment reinterpret(long newSize, * over the decoding process is required. *

* Getting a string from a segment with a known byte offset and - * known byte length can be done like so: - * {@snippet lang=java : - * byte[] bytes = new byte[length]; - * MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length); - * return new String(bytes, charset); - * } + * known byte length can be done using {@link #getString(long, Charset, long)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur @@ -1328,6 +1323,40 @@ MemorySegment reinterpret(long newSize, */ String getString(long offset, Charset charset); + /** + * Reads a string from this segment at the given offset, using the provided length + * and charset. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement string. The {@link + * java.nio.charset.CharsetDecoder} class should be used when more control + * over the decoding process is required. + *

+ * If the string contains any {@code '\0'} characters, they will be read as well. + * This differs from {@link #getString(long, Charset)}, which will only read up + * to the first {@code '\0'}, resulting in truncation for string data that contains + * the {@code '\0'} character. + * + * @param offset offset in bytes (relative to this segment address) at which this + * access operation will occur + * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the + * string bytes + * @param length length, in bytes, of the region of memory to read and decode into + * a string + * @return a Java string constructed from the bytes read from the given starting + * address up to the given length + * @throws IllegalArgumentException if the size of the string is greater than the + * largest string supported by the platform + * @throws IndexOutOfBoundsException if {@code offset < 0} + * @throws IndexOutOfBoundsException if {@code offset > byteSize() - length} + * @throws IllegalStateException if the {@linkplain #scope() scope} associated with + * this segment is not {@linkplain Scope#isAlive() alive} + * @throws WrongThreadException if this method is called from a thread {@code T}, + * such that {@code isAccessibleBy(T) == false} + * @throws IllegalArgumentException if {@code length < 0} + */ + String getString(long offset, Charset charset, long length); + /** * Writes the given string into this segment at the given offset, converting it to * a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8} @@ -1366,7 +1395,8 @@ MemorySegment reinterpret(long newSize, * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link #getString(long, Charset, long)}. * * @param offset offset in bytes (relative to this segment address) at which this * access operation will occur, the final address of this write @@ -2606,6 +2636,50 @@ static void copy(Object srcArray, int srcIndex, elementCount); } + /** + * Copies the byte sequence of the given string encoded using the provided charset + * to the destination segment. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement string. The {@link + * java.nio.charset.CharsetDecoder} class should be used when more control + * over the decoding process is required. + *

+ * If the given string contains any {@code '\0'} characters, they will be + * copied as well. This means that, depending on the method used to read + * the string, such as {@link MemorySegment#getString(long)}, the string + * will appear truncated when read again. The string can be read without + * truncation using {@link #getString(long, Charset, long)}. + * + * @param src the Java string to be written into the destination segment + * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode} + * the string bytes. + * @param srcIndex the starting character index of the source string + * @param dst the destination segment + * @param dstOffset the starting offset, in bytes, of the destination segment + * @param numChars the number of characters to be copied + * @throws IllegalStateException if the {@linkplain #scope() scope} associated with + * {@code dst} is not {@linkplain Scope#isAlive() alive} + * @throws WrongThreadException if this method is called from a thread {@code T}, + * such that {@code dst.isAccessibleBy(T) == false} + * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset} + * are {@code < 0} + * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars} + * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only} + * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size, + * in bytes, of the substring of {@code src} encoded using the given charset + * @return the number of copied bytes. + */ + @ForceInline + static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { + Objects.requireNonNull(src); + Objects.requireNonNull(dstEncoding); + Objects.requireNonNull(dst); + Objects.checkFromIndexSize(srcIndex, numChars, src.length()); + + return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars); + } + /** * Finds and returns the relative offset, in bytes, of the first mismatch between the * source and the destination segments. More specifically, the bytes at offset diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java index 1297406dcf194..5b213af544f74 100644 --- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java +++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java @@ -111,7 +111,8 @@ default MemorySegment allocateFrom(String str) { * If the given string contains any {@code '\0'} characters, they will be * copied as well. This means that, depending on the method used to read * the string, such as {@link MemorySegment#getString(long)}, the string - * will appear truncated when read again. + * will appear truncated when read again. The string can be read without + * truncation using {@link MemorySegment#getString(long, Charset, long)}. * * @param str the Java string to be converted into a C string * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the @@ -137,10 +138,10 @@ default MemorySegment allocateFrom(String str, Charset charset) { int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize(); MemorySegment segment; int length; - if (StringSupport.bytesCompatible(str, charset)) { + if (StringSupport.bytesCompatible(str, charset, 0, str.length())) { length = str.length(); segment = allocateNoInit((long) length + termCharSize); - StringSupport.copyToSegmentRaw(str, segment, 0); + StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length()); } else { byte[] bytes = str.getBytes(charset); length = bytes.length; @@ -153,6 +154,53 @@ default MemorySegment allocateFrom(String str, Charset charset) { return segment; } + /** + * Encodes a Java string using the provided charset and stores the resulting + * byte array into a memory segment. + *

+ * This method always replaces malformed-input and unmappable-character + * sequences with this charset's default replacement byte array. The + * {@link java.nio.charset.CharsetEncoder} class should be used when more + * control over the encoding process is required. + *

+ * If the given string contains any {@code '\0'} characters, they will be + * copied as well. This means that, depending on the method used to read + * the string, such as {@link MemorySegment#getString(long)}, the string + * will appear truncated when read again. The string can be read without + * truncation using {@link MemorySegment#getString(long, Charset, long)}. + * + * @param str the Java string to be encoded + * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the + * string bytes + * @param srcIndex the starting index of the source string + * @param numChars the number of characters to be copied + * @return a new native segment containing the encoded string + * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0} + * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars} + * + * @implSpec The default implementation for this method copies the contents of the + * provided Java string into a new memory segment obtained by calling + * {@code this.allocate(B)}, where {@code B} is the size, in bytes, of + * the string encoded using the provided charset + * (e.g. {@code str.getBytes(charset).length}); + */ + @ForceInline + default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) { + Objects.requireNonNull(charset); + Objects.requireNonNull(str); + Objects.checkFromIndexSize(srcIndex, numChars, str.length()); + MemorySegment segment; + if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) { + segment = allocateNoInit(numChars); + StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars); + } else { + byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset); + segment = allocateNoInit(bytes.length); + MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length); + } + return segment; + } + /** * {@return a new memory segment initialized with the provided byte value} *

diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java index fa6e5b4aac3a9..9e1a138acfae0 100644 --- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java +++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java @@ -612,10 +612,10 @@ StackWalker newStackWalkerInstance(Set options, /** * Copy the string bytes to an existing segment, avoiding intermediate copies. */ - void copyToSegmentRaw(String string, MemorySegment segment, long offset); + void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength); /** * Are the string bytes compatible with the given charset? */ - boolean bytesCompatible(String string, Charset charset); + boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars); } diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java index d7636032c2823..a98d318a2422c 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java +++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java @@ -535,6 +535,13 @@ public boolean equals(Object o) { unsafeGetOffset() == that.unsafeGetOffset(); } + @Override + public String getString(long offset, Charset charset, long length) { + Utils.checkNonNegativeArgument(length, "length"); + Objects.requireNonNull(charset); + return StringSupport.read(this, offset, charset, length); + } + @Override public int hashCode() { return Objects.hash( @@ -686,6 +693,16 @@ public static void copy(Object srcArray, int srcIndex, } } + @ForceInline + public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) { + Objects.requireNonNull(src); + Objects.requireNonNull(dstEncoding); + Objects.requireNonNull(dst); + + AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst; + return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars); + } + // accessors @ForceInline diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java index bb6cb2d391544..7cbe8ab0d48cb 100644 --- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java +++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java @@ -30,11 +30,14 @@ import jdk.internal.misc.ScopedMemoryAccess; import jdk.internal.util.Architecture; import jdk.internal.util.ArraysSupport; +import jdk.internal.util.Preconditions; import jdk.internal.vm.annotation.ForceInline; import java.lang.foreign.MemorySegment; +import java.lang.reflect.Array; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; +import java.util.Objects; import static java.lang.foreign.ValueLayout.*; @@ -58,6 +61,27 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse }; } + @ForceInline + public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) { + return readBytes(segment, offset, charset, length); + } + + @ForceInline + public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) { + if (length > Integer.MAX_VALUE) { + throw new IllegalArgumentException("Required length exceeds implementation limit"); + } + final int lengthBytes = (int) length; + final byte[] bytes = new byte[lengthBytes]; + MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes); + try { + return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); + } catch (CharacterCodingException _) { + // use replacement characters for malformed input + return new String(bytes, charset); + } + } + @ForceInline public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) { switch (CharsetKind.of(charset)) { @@ -70,14 +94,7 @@ public static void write(AbstractMemorySegmentImpl segment, long offset, Charset @ForceInline private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) { final int len = strlenByte(segment, offset, segment.byteSize()); - final byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return readBytes(segment, offset, charset, len); } @ForceInline @@ -89,14 +106,7 @@ private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Ch @ForceInline private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenShort(segment, offset, segment.byteSize()); - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return readBytes(segment, offset, charset, len); } @ForceInline @@ -108,14 +118,7 @@ private static void writeShort(AbstractMemorySegmentImpl segment, long offset, C @ForceInline private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) { int len = strlenInt(segment, offset, segment.byteSize()); - byte[] bytes = new byte[len]; - MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len); - try { - return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset); - } catch (CharacterCodingException _) { - // use replacement characters for malformed input - return new String(bytes, charset); - } + return readBytes(segment, offset, charset, len); } @ForceInline @@ -345,22 +348,26 @@ public static CharsetKind of(Charset charset) { } } - public static boolean bytesCompatible(String string, Charset charset) { - return JAVA_LANG_ACCESS.bytesCompatible(string, charset); + public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) { + return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars); } public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) { - if (bytesCompatible(string, charset)) { - copyToSegmentRaw(string, segment, offset); - return string.length(); + return copyBytes(string, segment, charset, offset, 0, string.length()); + } + + public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) { + if (bytesCompatible(string, charset, srcIndex, numChars)) { + copyToSegmentRaw(string, segment, offset, srcIndex, numChars); + return numChars; } else { - byte[] bytes = string.getBytes(charset); + byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset); MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length); return bytes.length; } } - public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) { - JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset); + public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) { + JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength); } } diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java index 94732943b9d36..e9e47420a6844 100644 --- a/test/jdk/java/foreign/TestStringEncoding.java +++ b/test/jdk/java/foreign/TestStringEncoding.java @@ -37,6 +37,7 @@ import java.util.Arrays; import java.util.List; import java.util.Random; +import java.util.Set; import java.util.function.UnaryOperator; import jdk.internal.foreign.AbstractMemorySegmentImpl; @@ -102,6 +103,140 @@ public void testStrings(String testString) { } } + @Test(dataProvider = "strings") + public void testStringsLength(String testString) { + if (!testString.isEmpty()) { + for (Charset charset : Charset.availableCharsets().values()) { + if (charset.canEncode()) { + for (Arena arena : arenas()) { + try (arena) { + MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length()); + long length = text.byteSize(); + assertEquals(length, testString.getBytes(charset).length); + String roundTrip = text.getString(0, charset, length); + if (charset.newEncoder().canEncode(testString)) { + assertEquals(roundTrip, testString); + } + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testStringsCopy(String testString) { + if (!testString.isEmpty()) { + for (Charset charset : Charset.availableCharsets().values()) { + if (charset.canEncode()) { + for (Arena arena : arenas()) { + try (arena) { + byte[] bytes = testString.getBytes(charset); + MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length); + MemorySegment.copy(testString, charset, 0, text, 0, testString.length()); + String roundTrip = text.getString(0, charset, bytes.length); + if (charset.newEncoder().canEncode(testString)) { + assertEquals(roundTrip, testString); + } + } + } + } + } + } + } + + @Test + public void testStringsLengthNegative() { + try (Arena arena = Arena.ofConfined()) { + var segment = arena.allocateFrom("abc"); + assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1)); + } + } + + @Test + public void testCopyThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + String testString_notBytesCompatible = "snowman \u26C4"; + MemorySegment text = arena.allocate(JAVA_BYTE, 3); + MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE, + testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length); + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length()); + MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0, + text_notBytesCompatible, 0, + testString_notBytesCompatible.length()); + // srcIndex < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length())); + // dstOffset < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length())); + // numChars < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1)); + // srcIndex + numChars > length + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length())); + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1)); + // dstOffset > byteSize() - B + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length())); + // srcIndex + numChars overflows + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3)); + assertThrows(IndexOutOfBoundsException.class, () -> + MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3)); + } + } + + @Test + public void testAllocateFromThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + String testString_notBytesCompatible = "snowman \u26C4"; + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length()); + arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1); + // srcIndex < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length())); + // numChars < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1)); + // srcIndex + numChars > length + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1)); + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length())); + // srcIndex + numChars overflows + assertThrows(IndexOutOfBoundsException.class, () -> + arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE)); + assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom( + testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE)); + } + } + + @Test + public void testGetStringThrows() { + try (Arena arena = Arena.ofConfined()) { + String testString = "abc"; + MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length()); + text.getString(0, StandardCharsets.UTF_8, 3); + // unsupported string size + assertThrows(IllegalArgumentException.class, () -> + text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L)); + // offset < 0 + assertThrows(IndexOutOfBoundsException.class, () -> + text.getString(-1, StandardCharsets.UTF_8, 3)); + // offset > byteSize() - length + assertThrows(IndexOutOfBoundsException.class, () -> + text.getString(1, StandardCharsets.UTF_8, 3)); + // length < 0 + assertThrows(IllegalArgumentException.class, () -> + text.getString(0, StandardCharsets.UTF_8, -1)); + } + } + @Test(dataProvider = "strings") public void testStringsHeap(String testString) { for (Charset charset : singleByteCharsets()) { @@ -221,6 +356,74 @@ public void testOffset(String testString) { } } + @Test(dataProvider = "strings") + public void testSubstringGetString(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length()); + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + // this test assumes single-byte charsets + String roundTrip = text.getString(srcIndex, charset, numChars); + String substring = testString.substring(srcIndex, srcIndex + numChars); + assertEquals(roundTrip, substring); + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testSubstringAllocate(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars); + String substring = testString.substring(srcIndex, srcIndex + numChars); + assertEquals(text.byteSize(), substring.getBytes(charset).length); + String roundTrip = text.getString(0, charset, text.byteSize()); + assertEquals(roundTrip, substring); + } + } + } + } + } + } + + @Test(dataProvider = "strings") + public void testSubstringCopy(String testString) { + if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) { + return; + } + for (var charset : singleByteCharsets()) { + for (var arena: arenas()) { + try (arena) { + for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) { + for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) { + String substring = testString.substring(srcIndex, srcIndex + numChars); + long length = substring.getBytes(charset).length; + MemorySegment text = arena.allocate(JAVA_BYTE, length); + long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars); + String roundTrip = text.getString(0, charset, length); + assertEquals(roundTrip, substring); + assertEquals(copied, length); + } + } + } + } + } + } + private static final MemoryLayout CHAR_POINTER = ADDRESS .withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE)); private static final Linker LINKER = Linker.nativeLinker(); @@ -402,7 +605,7 @@ public static Object[][] strings() { {""}, {"X"}, {"12345"}, - {"yen \u00A5"}, + {"section \u00A7"}, {"snowman \u26C4"}, {"rainbow \uD83C\uDF08"}, {"0"}, diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java new file mode 100644 index 0000000000000..ba559b52344b8 --- /dev/null +++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved. + * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. + * + * This code is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 only, as + * published by the Free Software Foundation. + * + * This code is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License + * version 2 for more details (a copy is included in the LICENSE file that + * accompanied this code). + * + * You should have received a copy of the GNU General Public License version + * 2 along with this work; if not, write to the Free Software Foundation, + * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. + * + * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA + * or visit www.oracle.com if you need additional information or have any + * questions. + */ +package org.openjdk.bench.java.lang.foreign; + +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; + +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Param; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.Setup; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; + +import java.lang.foreign.Arena; +import java.lang.foreign.MemorySegment; +import java.util.concurrent.TimeUnit; + +@BenchmarkMode(Mode.AverageTime) +@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS) +@State(Scope.Benchmark) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@Fork(value = 3) +public class FromJavaStringTest { + + private String str; + private MemorySegment strSegment; + private int lengthBytes; + + @Param({"5", "20", "100", "200", "451"}) + int size; + + @Setup + public void setup() { + var arena = Arena.ofAuto(); + while (LOREM.length() < size) { + LOREM += LOREM; + } + str = LOREM.substring(0, size); + strSegment = arena.allocateFrom(str); + lengthBytes = str.getBytes(UTF_8).length; + } + + @Benchmark + public void segment_setString() { + strSegment.setString(0, str, UTF_8); + } + + @Benchmark + public void segment_copyStringRaw() { + MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length()); + } + + @Benchmark + public void segment_copyStringBytes() { + byte[] bytes = str.getBytes(UTF_8); + MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length); + } + + static String LOREM = + """ + Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et + dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip + ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu + fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt + mollit anim id est laborum. + """; +} diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java index 901f4c7097f72..c3e8f3aaca425 100644 --- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java +++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java @@ -22,6 +22,9 @@ */ package org.openjdk.bench.java.lang.foreign; +import static java.lang.foreign.ValueLayout.JAVA_BYTE; +import static java.nio.charset.StandardCharsets.UTF_8; + import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; import org.openjdk.jmh.annotations.Fork; @@ -47,6 +50,7 @@ public class ToJavaStringTest { private MemorySegment strSegment; + private int length; @Param({"5", "20", "100", "200", "451"}) int size; @@ -61,19 +65,33 @@ public void setup() { while (LOREM.length() < size) { LOREM += LOREM; } - strSegment = arena.allocateFrom(LOREM.substring(0, size)); + var s = LOREM.substring(0, size); + strSegment = arena.allocateFrom(s); + length = s.getBytes(UTF_8).length; } @Benchmark - public String panama_readString() { + public String segment_getString() { return strSegment.getString(0); } + @Benchmark + public String segment_getStringLength() { + return strSegment.getString(0, UTF_8, length); + } + @Benchmark public String jni_readString() { return readString(strSegment.address()); } + @Benchmark + public String segment_copyStringBytes() { + byte[] bytes = new byte[length]; + MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length); + return new String(bytes, UTF_8); + } + static native String readString(long addr); static String LOREM = """