diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 52f908c9e98bf..8dac474accad5 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -2016,19 +2016,26 @@ public byte[] getBytes() {
return encode(Charset.defaultCharset(), coder(), value);
}
- boolean bytesCompatible(Charset charset) {
+ boolean bytesCompatible(Charset charset, int srcIndex, int numChars) {
if (isLatin1()) {
if (charset == ISO_8859_1.INSTANCE) {
return true; // ok, same encoding
} else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
- return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
+ return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible
}
}
return false;
}
- void copyToSegmentRaw(MemorySegment segment, long offset) {
- MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
+ void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) {
+ if (!isLatin1()) {
+ // This method is intended to be used together with bytesCompatible, which currently only supports
+ // latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like
+ // UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain
+ // unpaired surrogates). If that happens, copyToSegmentRaw should also be updated.
+ throw new IllegalStateException("This string does not support copyToSegmentRaw");
+ }
+ MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength);
}
/**
diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
index c88cf4ac79775..cb2b3022dfd72 100644
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@@ -2315,13 +2315,13 @@ public String getLoaderNameID(ClassLoader loader) {
}
@Override
- public void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
- string.copyToSegmentRaw(segment, offset);
+ public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
+ string.copyToSegmentRaw(segment, offset, srcIndex, srcLength);
}
@Override
- public boolean bytesCompatible(String string, Charset charset) {
- return string.bytesCompatible(charset);
+ public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
+ return string.bytesCompatible(charset, srcIndex, numChars);
}
});
}
diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
index 196f44d1abedc..195955b1a9218 100644
--- a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
+++ b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
@@ -1296,12 +1296,7 @@ MemorySegment reinterpret(long newSize,
* over the decoding process is required.
*
* Getting a string from a segment with a known byte offset and
- * known byte length can be done like so:
- * {@snippet lang=java :
- * byte[] bytes = new byte[length];
- * MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length);
- * return new String(bytes, charset);
- * }
+ * known byte length can be done using {@link #getString(long, Charset, long)}.
*
* @param offset offset in bytes (relative to this segment address) at which this
* access operation will occur
@@ -1328,6 +1323,40 @@ MemorySegment reinterpret(long newSize,
*/
String getString(long offset, Charset charset);
+ /**
+ * Reads a string from this segment at the given offset, using the provided length
+ * and charset.
+ *
+ * This method always replaces malformed-input and unmappable-character
+ * sequences with this charset's default replacement string. The {@link
+ * java.nio.charset.CharsetDecoder} class should be used when more control
+ * over the decoding process is required.
+ *
+ * If the string contains any {@code '\0'} characters, they will be read as well.
+ * This differs from {@link #getString(long, Charset)}, which will only read up
+ * to the first {@code '\0'}, resulting in truncation for string data that contains
+ * the {@code '\0'} character.
+ *
+ * @param offset offset in bytes (relative to this segment address) at which this
+ * access operation will occur
+ * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the
+ * string bytes
+ * @param length length, in bytes, of the region of memory to read and decode into
+ * a string
+ * @return a Java string constructed from the bytes read from the given starting
+ * address up to the given length
+ * @throws IllegalArgumentException if the size of the string is greater than the
+ * largest string supported by the platform
+ * @throws IndexOutOfBoundsException if {@code offset < 0}
+ * @throws IndexOutOfBoundsException if {@code offset > byteSize() - length}
+ * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+ * this segment is not {@linkplain Scope#isAlive() alive}
+ * @throws WrongThreadException if this method is called from a thread {@code T},
+ * such that {@code isAccessibleBy(T) == false}
+ * @throws IllegalArgumentException if {@code length < 0}
+ */
+ String getString(long offset, Charset charset, long length);
+
/**
* Writes the given string into this segment at the given offset, converting it to
* a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8}
@@ -1366,7 +1395,8 @@ MemorySegment reinterpret(long newSize,
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
- * will appear truncated when read again.
+ * will appear truncated when read again. The string can be read without
+ * truncation using {@link #getString(long, Charset, long)}.
*
* @param offset offset in bytes (relative to this segment address) at which this
* access operation will occur, the final address of this write
@@ -2606,6 +2636,50 @@ static void copy(Object srcArray, int srcIndex,
elementCount);
}
+ /**
+ * Copies the byte sequence of the given string encoded using the provided charset
+ * to the destination segment.
+ *
+ * This method always replaces malformed-input and unmappable-character
+ * sequences with this charset's default replacement string. The {@link
+ * java.nio.charset.CharsetDecoder} class should be used when more control
+ * over the decoding process is required.
+ *
+ * If the given string contains any {@code '\0'} characters, they will be
+ * copied as well. This means that, depending on the method used to read
+ * the string, such as {@link MemorySegment#getString(long)}, the string
+ * will appear truncated when read again. The string can be read without
+ * truncation using {@link #getString(long, Charset, long)}.
+ *
+ * @param src the Java string to be written into the destination segment
+ * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode}
+ * the string bytes.
+ * @param srcIndex the starting character index of the source string
+ * @param dst the destination segment
+ * @param dstOffset the starting offset, in bytes, of the destination segment
+ * @param numChars the number of characters to be copied
+ * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+ * {@code dst} is not {@linkplain Scope#isAlive() alive}
+ * @throws WrongThreadException if this method is called from a thread {@code T},
+ * such that {@code dst.isAccessibleBy(T) == false}
+ * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset}
+ * are {@code < 0}
+ * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars}
+ * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only}
+ * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size,
+ * in bytes, of the substring of {@code src} encoded using the given charset
+ * @return the number of copied bytes.
+ */
+ @ForceInline
+ static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+ Objects.requireNonNull(src);
+ Objects.requireNonNull(dstEncoding);
+ Objects.requireNonNull(dst);
+ Objects.checkFromIndexSize(srcIndex, numChars, src.length());
+
+ return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars);
+ }
+
/**
* Finds and returns the relative offset, in bytes, of the first mismatch between the
* source and the destination segments. More specifically, the bytes at offset
diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
index 1297406dcf194..5b213af544f74 100644
--- a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
+++ b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
@@ -111,7 +111,8 @@ default MemorySegment allocateFrom(String str) {
* If the given string contains any {@code '\0'} characters, they will be
* copied as well. This means that, depending on the method used to read
* the string, such as {@link MemorySegment#getString(long)}, the string
- * will appear truncated when read again.
+ * will appear truncated when read again. The string can be read without
+ * truncation using {@link MemorySegment#getString(long, Charset, long)}.
*
* @param str the Java string to be converted into a C string
* @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
@@ -137,10 +138,10 @@ default MemorySegment allocateFrom(String str, Charset charset) {
int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize();
MemorySegment segment;
int length;
- if (StringSupport.bytesCompatible(str, charset)) {
+ if (StringSupport.bytesCompatible(str, charset, 0, str.length())) {
length = str.length();
segment = allocateNoInit((long) length + termCharSize);
- StringSupport.copyToSegmentRaw(str, segment, 0);
+ StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length());
} else {
byte[] bytes = str.getBytes(charset);
length = bytes.length;
@@ -153,6 +154,53 @@ default MemorySegment allocateFrom(String str, Charset charset) {
return segment;
}
+ /**
+ * Encodes a Java string using the provided charset and stores the resulting
+ * byte array into a memory segment.
+ *
+ * This method always replaces malformed-input and unmappable-character
+ * sequences with this charset's default replacement byte array. The
+ * {@link java.nio.charset.CharsetEncoder} class should be used when more
+ * control over the encoding process is required.
+ *
+ * If the given string contains any {@code '\0'} characters, they will be
+ * copied as well. This means that, depending on the method used to read
+ * the string, such as {@link MemorySegment#getString(long)}, the string
+ * will appear truncated when read again. The string can be read without
+ * truncation using {@link MemorySegment#getString(long, Charset, long)}.
+ *
+ * @param str the Java string to be encoded
+ * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
+ * string bytes
+ * @param srcIndex the starting index of the source string
+ * @param numChars the number of characters to be copied
+ * @return a new native segment containing the encoded string
+ * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0}
+ * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars}
+ *
+ * @implSpec The default implementation for this method copies the contents of the
+ * provided Java string into a new memory segment obtained by calling
+ * {@code this.allocate(B)}, where {@code B} is the size, in bytes, of
+ * the string encoded using the provided charset
+ * (e.g. {@code str.getBytes(charset).length});
+ */
+ @ForceInline
+ default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) {
+ Objects.requireNonNull(charset);
+ Objects.requireNonNull(str);
+ Objects.checkFromIndexSize(srcIndex, numChars, str.length());
+ MemorySegment segment;
+ if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) {
+ segment = allocateNoInit(numChars);
+ StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars);
+ } else {
+ byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset);
+ segment = allocateNoInit(bytes.length);
+ MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length);
+ }
+ return segment;
+ }
+
/**
* {@return a new memory segment initialized with the provided byte value}
*
diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
index fa6e5b4aac3a9..9e1a138acfae0 100644
--- a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
+++ b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
@@ -612,10 +612,10 @@ StackWalker newStackWalkerInstance(Set options,
/**
* Copy the string bytes to an existing segment, avoiding intermediate copies.
*/
- void copyToSegmentRaw(String string, MemorySegment segment, long offset);
+ void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength);
/**
* Are the string bytes compatible with the given charset?
*/
- boolean bytesCompatible(String string, Charset charset);
+ boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars);
}
diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
index d7636032c2823..a98d318a2422c 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
@@ -535,6 +535,13 @@ public boolean equals(Object o) {
unsafeGetOffset() == that.unsafeGetOffset();
}
+ @Override
+ public String getString(long offset, Charset charset, long length) {
+ Utils.checkNonNegativeArgument(length, "length");
+ Objects.requireNonNull(charset);
+ return StringSupport.read(this, offset, charset, length);
+ }
+
@Override
public int hashCode() {
return Objects.hash(
@@ -686,6 +693,16 @@ public static void copy(Object srcArray, int srcIndex,
}
}
+ @ForceInline
+ public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+ Objects.requireNonNull(src);
+ Objects.requireNonNull(dstEncoding);
+ Objects.requireNonNull(dst);
+
+ AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst;
+ return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars);
+ }
+
// accessors
@ForceInline
diff --git a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java
index bb6cb2d391544..7cbe8ab0d48cb 100644
--- a/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java
+++ b/src/java.base/share/classes/jdk/internal/foreign/StringSupport.java
@@ -30,11 +30,14 @@
import jdk.internal.misc.ScopedMemoryAccess;
import jdk.internal.util.Architecture;
import jdk.internal.util.ArraysSupport;
+import jdk.internal.util.Preconditions;
import jdk.internal.vm.annotation.ForceInline;
import java.lang.foreign.MemorySegment;
+import java.lang.reflect.Array;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
+import java.util.Objects;
import static java.lang.foreign.ValueLayout.*;
@@ -58,6 +61,27 @@ public static String read(AbstractMemorySegmentImpl segment, long offset, Charse
};
}
+ @ForceInline
+ public static String read(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
+ return readBytes(segment, offset, charset, length);
+ }
+
+ @ForceInline
+ public static String readBytes(AbstractMemorySegmentImpl segment, long offset, Charset charset, long length) {
+ if (length > Integer.MAX_VALUE) {
+ throw new IllegalArgumentException("Required length exceeds implementation limit");
+ }
+ final int lengthBytes = (int) length;
+ final byte[] bytes = new byte[lengthBytes];
+ MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, lengthBytes);
+ try {
+ return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
+ } catch (CharacterCodingException _) {
+ // use replacement characters for malformed input
+ return new String(bytes, charset);
+ }
+ }
+
@ForceInline
public static void write(AbstractMemorySegmentImpl segment, long offset, Charset charset, String string) {
switch (CharsetKind.of(charset)) {
@@ -70,14 +94,7 @@ public static void write(AbstractMemorySegmentImpl segment, long offset, Charset
@ForceInline
private static String readByte(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
final int len = strlenByte(segment, offset, segment.byteSize());
- final byte[] bytes = new byte[len];
- MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
- try {
- return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
- } catch (CharacterCodingException _) {
- // use replacement characters for malformed input
- return new String(bytes, charset);
- }
+ return readBytes(segment, offset, charset, len);
}
@ForceInline
@@ -89,14 +106,7 @@ private static void writeByte(AbstractMemorySegmentImpl segment, long offset, Ch
@ForceInline
private static String readShort(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
int len = strlenShort(segment, offset, segment.byteSize());
- byte[] bytes = new byte[len];
- MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
- try {
- return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
- } catch (CharacterCodingException _) {
- // use replacement characters for malformed input
- return new String(bytes, charset);
- }
+ return readBytes(segment, offset, charset, len);
}
@ForceInline
@@ -108,14 +118,7 @@ private static void writeShort(AbstractMemorySegmentImpl segment, long offset, C
@ForceInline
private static String readInt(AbstractMemorySegmentImpl segment, long offset, Charset charset) {
int len = strlenInt(segment, offset, segment.byteSize());
- byte[] bytes = new byte[len];
- MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, len);
- try {
- return JAVA_LANG_ACCESS.uncheckedNewStringOrThrow(bytes, charset);
- } catch (CharacterCodingException _) {
- // use replacement characters for malformed input
- return new String(bytes, charset);
- }
+ return readBytes(segment, offset, charset, len);
}
@ForceInline
@@ -345,22 +348,26 @@ public static CharsetKind of(Charset charset) {
}
}
- public static boolean bytesCompatible(String string, Charset charset) {
- return JAVA_LANG_ACCESS.bytesCompatible(string, charset);
+ public static boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
+ return JAVA_LANG_ACCESS.bytesCompatible(string, charset, srcIndex, numChars);
}
public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset) {
- if (bytesCompatible(string, charset)) {
- copyToSegmentRaw(string, segment, offset);
- return string.length();
+ return copyBytes(string, segment, charset, offset, 0, string.length());
+ }
+
+ public static int copyBytes(String string, MemorySegment segment, Charset charset, long offset, int srcIndex, int numChars) {
+ if (bytesCompatible(string, charset, srcIndex, numChars)) {
+ copyToSegmentRaw(string, segment, offset, srcIndex, numChars);
+ return numChars;
} else {
- byte[] bytes = string.getBytes(charset);
+ byte[] bytes = string.substring(srcIndex, srcIndex + numChars).getBytes(charset);
MemorySegment.copy(bytes, 0, segment, JAVA_BYTE, offset, bytes.length);
return bytes.length;
}
}
- public static void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
- JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset);
+ public static void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
+ JAVA_LANG_ACCESS.copyToSegmentRaw(string, segment, offset, srcIndex, srcLength);
}
}
diff --git a/test/jdk/java/foreign/TestStringEncoding.java b/test/jdk/java/foreign/TestStringEncoding.java
index 94732943b9d36..e9e47420a6844 100644
--- a/test/jdk/java/foreign/TestStringEncoding.java
+++ b/test/jdk/java/foreign/TestStringEncoding.java
@@ -37,6 +37,7 @@
import java.util.Arrays;
import java.util.List;
import java.util.Random;
+import java.util.Set;
import java.util.function.UnaryOperator;
import jdk.internal.foreign.AbstractMemorySegmentImpl;
@@ -102,6 +103,140 @@ public void testStrings(String testString) {
}
}
+ @Test(dataProvider = "strings")
+ public void testStringsLength(String testString) {
+ if (!testString.isEmpty()) {
+ for (Charset charset : Charset.availableCharsets().values()) {
+ if (charset.canEncode()) {
+ for (Arena arena : arenas()) {
+ try (arena) {
+ MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
+ long length = text.byteSize();
+ assertEquals(length, testString.getBytes(charset).length);
+ String roundTrip = text.getString(0, charset, length);
+ if (charset.newEncoder().canEncode(testString)) {
+ assertEquals(roundTrip, testString);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test(dataProvider = "strings")
+ public void testStringsCopy(String testString) {
+ if (!testString.isEmpty()) {
+ for (Charset charset : Charset.availableCharsets().values()) {
+ if (charset.canEncode()) {
+ for (Arena arena : arenas()) {
+ try (arena) {
+ byte[] bytes = testString.getBytes(charset);
+ MemorySegment text = arena.allocate(JAVA_BYTE, bytes.length);
+ MemorySegment.copy(testString, charset, 0, text, 0, testString.length());
+ String roundTrip = text.getString(0, charset, bytes.length);
+ if (charset.newEncoder().canEncode(testString)) {
+ assertEquals(roundTrip, testString);
+ }
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testStringsLengthNegative() {
+ try (Arena arena = Arena.ofConfined()) {
+ var segment = arena.allocateFrom("abc");
+ assertThrows(IllegalArgumentException.class, () -> segment.getString(1, StandardCharsets.UTF_8, -1));
+ }
+ }
+
+ @Test
+ public void testCopyThrows() {
+ try (Arena arena = Arena.ofConfined()) {
+ String testString = "abc";
+ String testString_notBytesCompatible = "snowman \u26C4";
+ MemorySegment text = arena.allocate(JAVA_BYTE, 3);
+ MemorySegment text_notBytesCompatible = arena.allocate(JAVA_BYTE,
+ testString_notBytesCompatible.getBytes(StandardCharsets.UTF_8).length);
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length());
+ MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, 0,
+ text_notBytesCompatible, 0,
+ testString_notBytesCompatible.length());
+ // srcIndex < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, -1, text, 0, testString.length()));
+ // dstOffset < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, -1, testString.length()));
+ // numChars < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, -1));
+ // srcIndex + numChars > length
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 1, text, 0, testString.length()));
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 0, testString.length() + 1));
+ // dstOffset > byteSize() - B
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, 0, text, 1, testString.length()));
+ // srcIndex + numChars overflows
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ MemorySegment.copy(testString_notBytesCompatible, StandardCharsets.UTF_8, Integer.MAX_VALUE, text, 0, Integer.MAX_VALUE + 3));
+ }
+ }
+
+ @Test
+ public void testAllocateFromThrows() {
+ try (Arena arena = Arena.ofConfined()) {
+ String testString = "abc";
+ String testString_notBytesCompatible = "snowman \u26C4";
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 2, 1);
+ // srcIndex < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, -1, testString.length()));
+ // numChars < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, -1));
+ // srcIndex + numChars > length
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length() + 1));
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 1, testString.length()));
+ // srcIndex + numChars overflows
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ arena.allocateFrom(testString, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
+ assertThrows(IndexOutOfBoundsException.class, () -> arena.allocateFrom(
+ testString_notBytesCompatible, StandardCharsets.UTF_8, 3, Integer.MAX_VALUE));
+ }
+ }
+
+ @Test
+ public void testGetStringThrows() {
+ try (Arena arena = Arena.ofConfined()) {
+ String testString = "abc";
+ MemorySegment text = arena.allocateFrom(testString, StandardCharsets.UTF_8, 0, testString.length());
+ text.getString(0, StandardCharsets.UTF_8, 3);
+ // unsupported string size
+ assertThrows(IllegalArgumentException.class, () ->
+ text.getString(0, StandardCharsets.UTF_8, Integer.MAX_VALUE + 1L));
+ // offset < 0
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ text.getString(-1, StandardCharsets.UTF_8, 3));
+ // offset > byteSize() - length
+ assertThrows(IndexOutOfBoundsException.class, () ->
+ text.getString(1, StandardCharsets.UTF_8, 3));
+ // length < 0
+ assertThrows(IllegalArgumentException.class, () ->
+ text.getString(0, StandardCharsets.UTF_8, -1));
+ }
+ }
+
@Test(dataProvider = "strings")
public void testStringsHeap(String testString) {
for (Charset charset : singleByteCharsets()) {
@@ -221,6 +356,74 @@ public void testOffset(String testString) {
}
}
+ @Test(dataProvider = "strings")
+ public void testSubstringGetString(String testString) {
+ if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+ return;
+ }
+ for (var charset : singleByteCharsets()) {
+ for (var arena: arenas()) {
+ try (arena) {
+ MemorySegment text = arena.allocateFrom(testString, charset, 0, testString.length());
+ for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+ for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+ // this test assumes single-byte charsets
+ String roundTrip = text.getString(srcIndex, charset, numChars);
+ String substring = testString.substring(srcIndex, srcIndex + numChars);
+ assertEquals(roundTrip, substring);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test(dataProvider = "strings")
+ public void testSubstringAllocate(String testString) {
+ if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+ return;
+ }
+ for (var charset : singleByteCharsets()) {
+ for (var arena: arenas()) {
+ try (arena) {
+ for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+ for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+ MemorySegment text = arena.allocateFrom(testString, charset, srcIndex, numChars);
+ String substring = testString.substring(srcIndex, srcIndex + numChars);
+ assertEquals(text.byteSize(), substring.getBytes(charset).length);
+ String roundTrip = text.getString(0, charset, text.byteSize());
+ assertEquals(roundTrip, substring);
+ }
+ }
+ }
+ }
+ }
+ }
+
+ @Test(dataProvider = "strings")
+ public void testSubstringCopy(String testString) {
+ if (testString.length() < 3 || !containsOnlyRegularCharacters(testString)) {
+ return;
+ }
+ for (var charset : singleByteCharsets()) {
+ for (var arena: arenas()) {
+ try (arena) {
+ for (int srcIndex = 0; srcIndex <= testString.length(); srcIndex++) {
+ for (int numChars = 0; numChars <= testString.length() - srcIndex; numChars++) {
+ String substring = testString.substring(srcIndex, srcIndex + numChars);
+ long length = substring.getBytes(charset).length;
+ MemorySegment text = arena.allocate(JAVA_BYTE, length);
+ long copied = MemorySegment.copy(testString, charset, srcIndex, text, 0, numChars);
+ String roundTrip = text.getString(0, charset, length);
+ assertEquals(roundTrip, substring);
+ assertEquals(copied, length);
+ }
+ }
+ }
+ }
+ }
+ }
+
private static final MemoryLayout CHAR_POINTER = ADDRESS
.withTargetLayout(MemoryLayout.sequenceLayout(Long.MAX_VALUE, JAVA_BYTE));
private static final Linker LINKER = Linker.nativeLinker();
@@ -402,7 +605,7 @@ public static Object[][] strings() {
{""},
{"X"},
{"12345"},
- {"yen \u00A5"},
+ {"section \u00A7"},
{"snowman \u26C4"},
{"rainbow \uD83C\uDF08"},
{"0"},
diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java
new file mode 100644
index 0000000000000..ba559b52344b8
--- /dev/null
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/FromJavaStringTest.java
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2025, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang.foreign;
+
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+
+import java.lang.foreign.Arena;
+import java.lang.foreign.MemorySegment;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@Warmup(iterations = 5, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@Measurement(iterations = 10, time = 500, timeUnit = TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Fork(value = 3)
+public class FromJavaStringTest {
+
+ private String str;
+ private MemorySegment strSegment;
+ private int lengthBytes;
+
+ @Param({"5", "20", "100", "200", "451"})
+ int size;
+
+ @Setup
+ public void setup() {
+ var arena = Arena.ofAuto();
+ while (LOREM.length() < size) {
+ LOREM += LOREM;
+ }
+ str = LOREM.substring(0, size);
+ strSegment = arena.allocateFrom(str);
+ lengthBytes = str.getBytes(UTF_8).length;
+ }
+
+ @Benchmark
+ public void segment_setString() {
+ strSegment.setString(0, str, UTF_8);
+ }
+
+ @Benchmark
+ public void segment_copyStringRaw() {
+ MemorySegment.copy(str, UTF_8, 0, strSegment, 0, str.length());
+ }
+
+ @Benchmark
+ public void segment_copyStringBytes() {
+ byte[] bytes = str.getBytes(UTF_8);
+ MemorySegment.copy(bytes, 0, strSegment, JAVA_BYTE, 0, bytes.length);
+ }
+
+ static String LOREM =
+ """
+ Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et
+ dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip
+ ex ea commodo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu
+ fugiat nulla pariatur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt
+ mollit anim id est laborum.
+ """;
+}
diff --git a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java
index 901f4c7097f72..c3e8f3aaca425 100644
--- a/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java
+++ b/test/micro/org/openjdk/bench/java/lang/foreign/ToJavaStringTest.java
@@ -22,6 +22,9 @@
*/
package org.openjdk.bench.java.lang.foreign;
+import static java.lang.foreign.ValueLayout.JAVA_BYTE;
+import static java.nio.charset.StandardCharsets.UTF_8;
+
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
@@ -47,6 +50,7 @@
public class ToJavaStringTest {
private MemorySegment strSegment;
+ private int length;
@Param({"5", "20", "100", "200", "451"})
int size;
@@ -61,19 +65,33 @@ public void setup() {
while (LOREM.length() < size) {
LOREM += LOREM;
}
- strSegment = arena.allocateFrom(LOREM.substring(0, size));
+ var s = LOREM.substring(0, size);
+ strSegment = arena.allocateFrom(s);
+ length = s.getBytes(UTF_8).length;
}
@Benchmark
- public String panama_readString() {
+ public String segment_getString() {
return strSegment.getString(0);
}
+ @Benchmark
+ public String segment_getStringLength() {
+ return strSegment.getString(0, UTF_8, length);
+ }
+
@Benchmark
public String jni_readString() {
return readString(strSegment.address());
}
+ @Benchmark
+ public String segment_copyStringBytes() {
+ byte[] bytes = new byte[length];
+ MemorySegment.copy(strSegment, JAVA_BYTE, 0, bytes, 0, length);
+ return new String(bytes, UTF_8);
+ }
+
static native String readString(long addr);
static String LOREM = """