openjdk · cushon · Oct 28, 2025 · Oct 29, 2025 · Oct 30, 2025 · Nov 4, 2025
diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
@@ -2016,19 +2016,26 @@ public byte[] getBytes() {
         return encode(Charset.defaultCharset(), coder(), value);
     }
 
-    boolean bytesCompatible(Charset charset) {
+    boolean bytesCompatible(Charset charset, int srcIndex, int numChars) {
         if (isLatin1()) {
             if (charset == ISO_8859_1.INSTANCE) {
                 return true; // ok, same encoding
             } else if (charset == UTF_8.INSTANCE || charset == US_ASCII.INSTANCE) {
-                return !StringCoding.hasNegatives(value, 0, value.length); // ok, if ASCII-compatible
+                return !StringCoding.hasNegatives(value, srcIndex, numChars); // ok, if ASCII-compatible
             }
         }
         return false;
     }
 
-    void copyToSegmentRaw(MemorySegment segment, long offset) {
-        MemorySegment.copy(value, 0, segment, ValueLayout.JAVA_BYTE, offset, value.length);
+    void copyToSegmentRaw(MemorySegment segment, long offset, int srcIndex, int srcLength) {
+        if (!isLatin1()) {
+            // This method is intended to be used together with bytesCompatible, which currently only supports
+            // latin1 strings. In the future, bytesCompatible could be updated to handle more cases, like
+            // UTF-16 strings (when the platform and charset endianness match, and the String doesn’t contain
+            // unpaired surrogates). If that happens, copyToSegmentRaw should also be updated.
+            throw new IllegalStateException("This string does not support copyToSegmentRaw");
+        }
+        MemorySegment.copy(value, srcIndex, segment, ValueLayout.JAVA_BYTE, offset, srcLength);
     }
 
     /**

diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
@@ -2315,13 +2315,13 @@ public String getLoaderNameID(ClassLoader loader) {
             }
 
             @Override
-            public void copyToSegmentRaw(String string, MemorySegment segment, long offset) {
-                string.copyToSegmentRaw(segment, offset);
+            public void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength) {
+                string.copyToSegmentRaw(segment, offset, srcIndex, srcLength);
             }
 
             @Override
-            public boolean bytesCompatible(String string, Charset charset) {
-                return string.bytesCompatible(charset);
+            public boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars) {
+                return string.bytesCompatible(charset, srcIndex, numChars);
             }
         });
     }

diff --git a/src/java.base/share/classes/java/lang/foreign/MemorySegment.java b/src/java.base/share/classes/java/lang/foreign/MemorySegment.java
@@ -1296,12 +1296,7 @@ MemorySegment reinterpret(long newSize,
      * over the decoding process is required.
      * <p>
      * Getting a string from a segment with a known byte offset and
-     * known byte length can be done like so:
-     * {@snippet lang=java :
-     *     byte[] bytes = new byte[length];
-     *     MemorySegment.copy(segment, JAVA_BYTE, offset, bytes, 0, length);
-     *     return new String(bytes, charset);
-     * }
+     * known byte length can be done using {@link #getString(long, Charset, long)}.
      *
      * @param offset  offset in bytes (relative to this segment address) at which this
      *                access operation will occur
@@ -1328,6 +1323,40 @@ MemorySegment reinterpret(long newSize,
      */
     String getString(long offset, Charset charset);
 
+    /**
+     * Reads a string from this segment at the given offset, using the provided length
+     * and charset.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement string. The {@link
+     * java.nio.charset.CharsetDecoder} class should be used when more control
+     * over the decoding process is required.
+     * <p>
+     * If the string contains any {@code '\0'} characters, they will be read as well.
+     * This differs from {@link #getString(long, Charset)}, which will only read up
+     * to the first {@code '\0'}, resulting in truncation for string data that contains
+     * the {@code '\0'} character.
+     *
+     * @param offset  offset in bytes (relative to this segment address) at which this
+     *                access operation will occur
+     * @param charset the charset used to {@linkplain Charset#newDecoder() decode} the
+     *                string bytes
+     * @param length  length, in bytes, of the region of memory to read and decode into
+     *                a string
+     * @return a Java string constructed from the bytes read from the given starting
+     *         address up to the given length
+     * @throws IllegalArgumentException  if the size of the string is greater than the
+     *         largest string supported by the platform
+     * @throws IndexOutOfBoundsException if {@code offset < 0}
+     * @throws IndexOutOfBoundsException if {@code offset > byteSize() - length}
+     * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+     *         this segment is not {@linkplain Scope#isAlive() alive}
+     * @throws WrongThreadException if this method is called from a thread {@code T},
+     *         such that {@code isAccessibleBy(T) == false}
+     * @throws IllegalArgumentException if {@code length < 0}
+     */
+    String getString(long offset, Charset charset, long length);
+
     /**
      * Writes the given string into this segment at the given offset, converting it to
      * a null-terminated byte sequence using the {@linkplain StandardCharsets#UTF_8 UTF-8}
@@ -1366,7 +1395,8 @@ MemorySegment reinterpret(long newSize,
      * If the given string contains any {@code '\0'} characters, they will be
      * copied as well. This means that, depending on the method used to read
      * the string, such as {@link MemorySegment#getString(long)}, the string
-     * will appear truncated when read again.
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link #getString(long, Charset, long)}.
      *
      * @param offset  offset in bytes (relative to this segment address) at which this
      *                access operation will occur, the final address of this write
@@ -2606,6 +2636,50 @@ static void copy(Object srcArray, int srcIndex,
                 elementCount);
     }
 
+    /**
+     * Copies the byte sequence of the given string encoded using the provided charset
+     * to the destination segment.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement string. The {@link
+     * java.nio.charset.CharsetDecoder} class should be used when more control
+     * over the decoding process is required.
+     * <p>
+     * If the given string contains any {@code '\0'} characters, they will be
+     * copied as well. This means that, depending on the method used to read
+     * the string, such as {@link MemorySegment#getString(long)}, the string
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link #getString(long, Charset, long)}.
+     *
+     * @param src      the Java string to be written into the destination segment
+     * @param dstEncoding the charset used to {@linkplain Charset#newEncoder() encode}
+     *                 the string bytes.
+     * @param srcIndex the starting character index of the source string
+     * @param dst      the destination segment
+     * @param dstOffset the starting offset, in bytes, of the destination segment
+     * @param numChars the number of characters to be copied
+     * @throws IllegalStateException if the {@linkplain #scope() scope} associated with
+     *         {@code dst} is not {@linkplain Scope#isAlive() alive}
+     * @throws WrongThreadException if this method is called from a thread {@code T},
+     *         such that {@code dst.isAccessibleBy(T) == false}
+     * @throws IndexOutOfBoundsException if either {@code srcIndex}, {@code numChars}, or {@code dstOffset}
+     *         are {@code < 0}
+     * @throws IndexOutOfBoundsException if {@code srcIndex > src.length() - numChars}
+     * @throws IllegalArgumentException if {@code dst} is {@linkplain #isReadOnly() read-only}
+     * @throws IndexOutOfBoundsException if {@code dstOffset > dstSegment.byteSize() - B} where {@code B} is the size,
+     *         in bytes, of the substring of {@code src} encoded using the given charset
+     * @return the number of copied bytes.
+     */
+    @ForceInline
+    static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+        Objects.requireNonNull(src);
+        Objects.requireNonNull(dstEncoding);
+        Objects.requireNonNull(dst);
+        Objects.checkFromIndexSize(srcIndex, numChars, src.length());
+
+        return AbstractMemorySegmentImpl.copy(src, dstEncoding, srcIndex, dst, dstOffset, numChars);
+    }
+
     /**
      * Finds and returns the relative offset, in bytes, of the first mismatch between the
      * source and the destination segments. More specifically, the bytes at offset

diff --git a/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java b/src/java.base/share/classes/java/lang/foreign/SegmentAllocator.java
@@ -111,7 +111,8 @@ default MemorySegment allocateFrom(String str) {
      * If the given string contains any {@code '\0'} characters, they will be
      * copied as well. This means that, depending on the method used to read
      * the string, such as {@link MemorySegment#getString(long)}, the string
-     * will appear truncated when read again.
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link MemorySegment#getString(long, Charset, long)}.
      *
      * @param str     the Java string to be converted into a C string
      * @param charset the charset used to {@linkplain Charset#newEncoder() encode} the
@@ -137,10 +138,10 @@ default MemorySegment allocateFrom(String str, Charset charset) {
         int termCharSize = StringSupport.CharsetKind.of(charset).terminatorCharSize();
         MemorySegment segment;
         int length;
-        if (StringSupport.bytesCompatible(str, charset)) {
+        if (StringSupport.bytesCompatible(str, charset, 0, str.length())) {
             length = str.length();
             segment = allocateNoInit((long) length + termCharSize);
-            StringSupport.copyToSegmentRaw(str, segment, 0);
+            StringSupport.copyToSegmentRaw(str, segment, 0, 0, str.length());
         } else {
             byte[] bytes = str.getBytes(charset);
             length = bytes.length;
@@ -153,6 +154,53 @@ default MemorySegment allocateFrom(String str, Charset charset) {
         return segment;
     }
 
+    /**
+     * Encodes a Java string using the provided charset and stores the resulting
+     * byte array into a memory segment.
+     * <p>
+     * This method always replaces malformed-input and unmappable-character
+     * sequences with this charset's default replacement byte array. The
+     * {@link java.nio.charset.CharsetEncoder} class should be used when more
+     * control over the encoding process is required.
+     * <p>
+     * If the given string contains any {@code '\0'} characters, they will be
+     * copied as well. This means that, depending on the method used to read
+     * the string, such as {@link MemorySegment#getString(long)}, the string
+     * will appear truncated when read again. The string can be read without
+     * truncation using {@link MemorySegment#getString(long, Charset, long)}.
+     *
+     * @param str      the Java string to be encoded
+     * @param charset  the charset used to {@linkplain Charset#newEncoder() encode} the
+     *                 string bytes
+     * @param srcIndex the starting index of the source string
+     * @param numChars the number of characters to be copied
+     * @return a new native segment containing the encoded string
+     * @throws IndexOutOfBoundsException if either {@code srcIndex} or {@code numChars} are {@code < 0}
+     * @throws IndexOutOfBoundsException if {@code srcIndex > str.length() - numChars}
+     *
+     * @implSpec The default implementation for this method copies the contents of the
+     *           provided Java string into a new memory segment obtained by calling
+     *           {@code this.allocate(B)}, where {@code B} is the size, in bytes, of
+     *           the string encoded using the provided charset
+     *           (e.g. {@code str.getBytes(charset).length});
+     */
+    @ForceInline
+    default MemorySegment allocateFrom(String str, Charset charset, int srcIndex, int numChars) {
+        Objects.requireNonNull(charset);
+        Objects.requireNonNull(str);
+        Objects.checkFromIndexSize(srcIndex, numChars, str.length());
+        MemorySegment segment;
+        if (StringSupport.bytesCompatible(str, charset, srcIndex, numChars)) {
+            segment = allocateNoInit(numChars);
+            StringSupport.copyToSegmentRaw(str, segment, 0, srcIndex, numChars);
+        } else {
+            byte[] bytes = str.substring(srcIndex, srcIndex + numChars).getBytes(charset);
+            segment = allocateNoInit(bytes.length);
+            MemorySegment.copy(bytes, 0, segment, ValueLayout.JAVA_BYTE, 0, bytes.length);
+        }
+        return segment;
+    }
+
     /**
      * {@return a new memory segment initialized with the provided byte value}
      * <p>

diff --git a/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java b/src/java.base/share/classes/jdk/internal/access/JavaLangAccess.java
@@ -612,10 +612,10 @@ StackWalker newStackWalkerInstance(Set<StackWalker.Option> options,
     /**
      * Copy the string bytes to an existing segment, avoiding intermediate copies.
      */
-    void copyToSegmentRaw(String string, MemorySegment segment, long offset);
+    void copyToSegmentRaw(String string, MemorySegment segment, long offset, int srcIndex, int srcLength);
 
     /**
      * Are the string bytes compatible with the given charset?
      */
-    boolean bytesCompatible(String string, Charset charset);
+    boolean bytesCompatible(String string, Charset charset, int srcIndex, int numChars);
 }
diff --git a/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java b/src/java.base/share/classes/jdk/internal/foreign/AbstractMemorySegmentImpl.java
@@ -535,6 +535,13 @@ public boolean equals(Object o) {
                 unsafeGetOffset() == that.unsafeGetOffset();
     }
 
+    @Override
+    public String getString(long offset, Charset charset, long length) {
+        Utils.checkNonNegativeArgument(length, "length");
+        Objects.requireNonNull(charset);
+        return StringSupport.read(this, offset, charset, length);
+    }
+
     @Override
     public int hashCode() {
         return Objects.hash(
@@ -686,6 +693,16 @@ public static void copy(Object srcArray, int srcIndex,
         }
     }
 
+    @ForceInline
+    public static long copy(String src, Charset dstEncoding, int srcIndex, MemorySegment dst, long dstOffset, int numChars) {
+        Objects.requireNonNull(src);
+        Objects.requireNonNull(dstEncoding);
+        Objects.requireNonNull(dst);
+
+        AbstractMemorySegmentImpl destImpl = (AbstractMemorySegmentImpl)dst;
+        return StringSupport.copyBytes(src, destImpl, dstEncoding, dstOffset, srcIndex, numChars);
+    }
+
     // accessors
 
     @ForceInline