From 80ef37b2190a835a19590b52957f248fec4bae02 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 17:24:12 +0100
Subject: [PATCH 01/25] First stab

---
 .../share/classes/java/lang/String.java       | 508 ++++++++++++++++-
 .../share/classes/java/lang/StringCoding.java | 521 +-----------------
 .../share/classes/java/lang/System.java       |   4 +-
 3 files changed, 503 insertions(+), 530 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index e96943ffe1327..ff95f419d8153 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -31,7 +31,9 @@
 import java.lang.invoke.MethodHandles;
 import java.lang.constant.Constable;
 import java.lang.constant.ConstantDesc;
-import java.nio.charset.Charset;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.*;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -51,7 +53,11 @@
 import java.util.stream.StreamSupport;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
+import sun.nio.cs.ArrayDecoder;
 
+import static java.lang.Character.*;
+import static java.lang.Character.lowSurrogate;
+import static java.lang.StringUTF16.putChar;
 import static java.util.function.Predicate.not;
 
 /**
@@ -217,6 +223,12 @@ public final class String
         COMPACT_STRINGS = true;
     }
 
+    private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
+    private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
+    private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
+
+    private static final char REPL = '\ufffd';
+
     /**
      * Class String is special cased within the Serialization Stream Protocol.
      *
@@ -475,15 +487,9 @@ public String(byte ascii[], int hibyte) {
      *
      * @since  1.1
      */
-    public String(byte bytes[], int offset, int length, String charsetName)
+    public String(byte[] bytes, int offset, int length, String charsetName)
             throws UnsupportedEncodingException {
-        if (charsetName == null)
-            throw new NullPointerException("charsetName");
-        checkBoundsOffCount(offset, length, bytes.length);
-        StringCoding.Result ret =
-            StringCoding.decode(charsetName, bytes, offset, length);
-        this.value = ret.value;
-        this.coder = ret.coder;
+        this(bytes, offset, length, StringCoding.lookupCharset(Objects.requireNonNull(charsetName)));
     }
 
     /**
@@ -516,14 +522,479 @@ public String(byte bytes[], int offset, int length, String charsetName)
      *
      * @since  1.6
      */
-    public String(byte bytes[], int offset, int length, Charset charset) {
-        if (charset == null)
-            throw new NullPointerException("charset");
+    public String(byte[] bytes, int offset, int length, Charset charset) {
+        Objects.requireNonNull(charset);
+        checkBoundsOffCount(offset, length, bytes.length);
+        if (charset == UTF_8) {
+            if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+                this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+                this.coder = LATIN1;
+            } else {
+                int sl = offset + length;
+                int dp = 0;
+                byte[] dst = new byte[length];
+
+                if (COMPACT_STRINGS) {
+                    while (offset < sl) {
+                        int b1 = bytes[offset];
+                        if (b1 >= 0) {
+                            dst[dp++] = (byte)b1;
+                            offset++;
+                            continue;
+                        }
+                        if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+                                offset + 1 < sl) {
+                            int b2 = bytes[offset + 1];
+                            if (!StringCoding.isNotContinuation(b2)) {
+                                dst[dp++] = (byte)(((b1 << 6) ^ b2)^
+                                        (((byte) 0xC0 << 6) ^
+                                        ((byte) 0x80 << 0)));
+                                offset += 2;
+                                continue;
+                            }
+                        }
+                        // anything not a latin1, including the repl
+                        // we have to go with the utf16
+                        break;
+                    }
+                    if (offset == sl) {
+                        if (dp != dst.length) {
+                            dst = Arrays.copyOf(dst, dp);
+                        }
+                        this.value = dst;
+                        this.coder = LATIN1;
+                        return;
+                    }
+                }
+                if (dp == 0) {
+                    dst = new byte[length << 1];
+                } else {
+                    byte[] buf = new byte[length << 1];
+                    StringLatin1.inflate(dst, 0, buf, 0, dp);
+                    dst = buf;
+                }
+                while (offset < sl) {
+                    int b1 = bytes[offset++];
+                    if (b1 >= 0) {
+                        putChar(dst, dp++, (char) b1);
+                    } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                        if (offset < sl) {
+                            int b2 = bytes[offset++];
+                            if (StringCoding.isNotContinuation(b2)) {
+                                putChar(dst, dp++, REPL);
+                                offset--;
+                            } else {
+                                putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
+                                        (((byte) 0xC0 << 6) ^
+                                        ((byte) 0x80 << 0))));
+                            }
+                            continue;
+                        }
+                        putChar(dst, dp++, REPL);
+                        break;
+                    } else if ((b1 >> 4) == -2) {
+                        if (offset + 1 < sl) {
+                            int b2 = bytes[offset++];
+                            int b3 = bytes[offset++];
+                            if (StringCoding.isMalformed3(b1, b2, b3)) {
+                                putChar(dst, dp++, REPL);
+                                offset -= 3;
+                                offset += StringCoding.malformedN(bytes, offset, 3);
+                            } else {
+                                char c = (char)((b1 << 12) ^
+                                                (b2 <<  6) ^
+                                                (b3 ^
+                                                 (((byte) 0xE0 << 12) ^
+                                                  ((byte) 0x80 <<  6) ^
+                                                  ((byte) 0x80 <<  0))));
+                                if (isSurrogate(c)) {
+                                    putChar(dst, dp++, REPL);
+                                } else {
+                                    putChar(dst, dp++, c);
+                                }
+                            }
+                            continue;
+                        }
+                        if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
+                            putChar(dst, dp++, REPL);
+                            continue;
+                        }
+                        putChar(dst, dp++, REPL);
+                        break;
+                    } else if ((b1 >> 3) == -2) {
+                        if (offset + 2 < sl) {
+                            int b2 = bytes[offset++];
+                            int b3 = bytes[offset++];
+                            int b4 = bytes[offset++];
+                            int uc = ((b1 << 18) ^
+                                      (b2 << 12) ^
+                                      (b3 <<  6) ^
+                                      (b4 ^
+                                       (((byte) 0xF0 << 18) ^
+                                        ((byte) 0x80 << 12) ^
+                                        ((byte) 0x80 <<  6) ^
+                                        ((byte) 0x80 <<  0))));
+                            if (StringCoding.isMalformed4(b2, b3, b4) ||
+                                    !isSupplementaryCodePoint(uc)) { // shortest form check
+                                putChar(dst, dp++, REPL);
+                                offset -= 4;
+                                offset += StringCoding.malformedN(bytes, offset, 4);
+                            } else {
+                                putChar(dst, dp++, highSurrogate(uc));
+                                putChar(dst, dp++, lowSurrogate(uc));
+                            }
+                            continue;
+                        }
+                        b1 &= 0xff;
+                        if (b1 > 0xf4 ||
+                                offset  < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
+                            putChar(dst, dp++, REPL);
+                            continue;
+                        }
+                        offset++;
+                        putChar(dst, dp++, REPL);
+                        if (offset  < sl && StringCoding.isMalformed4_3(bytes[offset])) {
+                            continue;
+                        }
+                        break;
+                    } else {
+                        putChar(dst, dp++, REPL);
+                    }
+                }
+                if (dp != length) {
+                    dst = Arrays.copyOf(dst, dp << 1);
+                }
+                this.value = dst;
+                this.coder = UTF16;
+            }
+        } else if (charset == ISO_8859_1) {
+            if (COMPACT_STRINGS) {
+                this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+                this.coder = LATIN1;
+            } else {
+                this.value = StringLatin1.inflate(bytes, offset, length);
+                this.coder = UTF16;
+            }
+        } else if (charset == US_ASCII) {
+            if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+                this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+                this.coder = LATIN1;
+            } else {
+                byte[] dst = new byte[length << 1];
+                int dp = 0;
+                while (dp < length) {
+                    int b = bytes[offset++];
+                    putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
+                }
+                this.value = dst;
+                this.coder = UTF16;
+            }
+        } else {
+            // (1)We never cache the "external" cs, the only benefit of creating
+            // an additional StringDe/Encoder object to wrap it is to share the
+            // de/encode() method. These SD/E objects are short-lived, the young-gen
+            // gc should be able to take care of them well. But the best approach
+            // is still not to generate them if not really necessary.
+            // (2)The defensive copy of the input byte/char[] has a big performance
+            // impact, as well as the outgoing result byte/char[]. Need to do the
+            // optimization check of (sm==null && classLoader0==null) for both.
+            // (3)There might be a timing gap in isTrusted setting. getClassLoader0()
+            // is only checked (and then isTrusted gets set) when (SM==null). It is
+            // possible that the SM==null for now but then SM is NOT null later
+            // when safeTrim() is invoked...the "safe" way to do is to redundant
+            // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
+            // but it then can be argued that the SM is null when the operation
+            // is started...
+            CharsetDecoder cd = charset.newDecoder();
+            // ascii fastpath
+            if ((cd instanceof ArrayDecoder) &&
+                    ((ArrayDecoder)cd).isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
+                if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+                    this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+                    this.coder = LATIN1;
+                } else {
+                    byte[] dst = new byte[length << 1];
+                    int dp = 0;
+                    while (dp < length) {
+                        int b = bytes[offset++];
+                        putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
+                    }
+                    this.value = dst;
+                    this.coder = UTF16;
+                }
+                return;
+            }
+            // fastpath for always Latin1 decodable single byte
+            if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
+                byte[] dst = new byte[length];
+                ((ArrayDecoder)cd).decodeToLatin1(bytes, offset, length, dst);
+                this.value = dst;
+                this.coder = LATIN1;
+                return;
+            }
+
+            int en = StringCoding.scale(length, cd.maxCharsPerByte());
+            if (length == 0) {
+                this.value = "".value;
+                this.coder = "".coder;
+                return;
+            }
+            cd.onMalformedInput(CodingErrorAction.REPLACE)
+                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                    .reset();
+            char[] ca = new char[en];
+            if (cd instanceof ArrayDecoder) {
+                int clen = ((ArrayDecoder)cd).decode(bytes, offset, length, ca);
+                if (COMPACT_STRINGS) {
+                    byte[] bs = StringUTF16.compress(ca, 0, clen);
+                    if (bs != null) {
+                        value = bs;
+                        coder = LATIN1;
+                        return;
+                    }
+                }
+                coder = UTF16;
+                value = StringUTF16.toBytes(ca, 0, clen);
+                return;
+            }
+            if (charset.getClass().getClassLoader0() != null &&
+                    System.getSecurityManager() != null) {
+                bytes = Arrays.copyOfRange(bytes, offset, offset + length);
+                offset = 0;
+            }
+            ByteBuffer bb = ByteBuffer.wrap(bytes, offset, length);
+            CharBuffer cb = CharBuffer.wrap(ca);
+            try {
+                CoderResult cr = cd.decode(bb, cb, true);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+                cr = cd.flush(cb);
+                if (!cr.isUnderflow())
+                    cr.throwException();
+            } catch (CharacterCodingException x) {
+                // Substitution is always enabled,
+                // so this shouldn't happen
+                throw new Error(x);
+            }
+            if (COMPACT_STRINGS) {
+                byte[] bs = StringUTF16.compress(ca, 0, cb.position());
+                if (bs != null) {
+                    value = bs;
+                    coder = LATIN1;
+                    return;
+                }
+            }
+            coder = UTF16;
+            value = StringUTF16.toBytes(ca, 0, cb.position());
+        }
+    }
+
+    ////////////////////// for j.u.z.ZipCoder //////////////////////////
+
+    /*
+     * Throws iae, instead of replacing, if malformed or unmappable.
+     */
+    static String newStringUTF8NoRepl(byte[] src, int off, int len) {
+        return new String(src, off, len, (Void)null);
+    }
+
+    static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
+        try {
+            return newStringNoRepl1(src, cs);
+        } catch (IllegalArgumentException e) {
+            //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
+            Throwable cause = e.getCause();
+            if (cause instanceof MalformedInputException) {
+                throw (MalformedInputException)cause;
+            }
+            throw (CharacterCodingException)cause;
+        }
+    }
+
+    static String newStringNoRepl1(byte[] src, Charset cs) {
+        if (cs == UTF_8) {
+            return newStringUTF8NoRepl(src, 0, src.length);
+        }
+        if (cs == ISO_8859_1) {
+            return new String(src, 0, src.length, ISO_8859_1);
+        }
+        if (cs == US_ASCII) {
+            if (!StringCoding.hasNegatives(src, 0, src.length)) {
+                return new String(src, 0, src.length, ISO_8859_1);
+            } else {
+                StringCoding.throwMalformed(src);
+            }
+        }
+
+        CharsetDecoder cd = cs.newDecoder();
+        // ascii fastpath
+        if ((cd instanceof ArrayDecoder) &&
+                ((ArrayDecoder)cd).isASCIICompatible() && !StringCoding.hasNegatives(src, 0, src.length)) {
+            return new String(src, 0, src.length, ISO_8859_1);
+        }
+        int len = src.length;
+        if (len == 0) {
+            return "";
+        }
+        int en = StringCoding.scale(len, cd.maxCharsPerByte());
+        char[] ca = new char[en];
+        if (cs.getClass().getClassLoader0() != null &&
+                System.getSecurityManager() != null) {
+            src = Arrays.copyOf(src, len);
+        }
+        ByteBuffer bb = ByteBuffer.wrap(src);
+        CharBuffer cb = CharBuffer.wrap(ca);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
+        }
+        StringCoding.Result ret = new StringCoding.Result().with(ca, 0, cb.position());
+        return new String(ret.value, ret.coder);
+    }
+
+    /*
+     * Private constructor for doing UTF-8 decode, but throwing iae on malformed or
+     * unmappable characters
+     */
+    private String(byte[] bytes, int offset, int length, Void throwOnError) {
         checkBoundsOffCount(offset, length, bytes.length);
-        StringCoding.Result ret =
-            StringCoding.decode(charset, bytes, offset, length);
-        this.value = ret.value;
-        this.coder = ret.coder;
+        if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+            this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+            this.coder = LATIN1;
+        } else {
+            int sl = offset + length;
+            int dp = 0;
+            byte[] dst = new byte[length];
+            if (COMPACT_STRINGS) {
+                while (offset < sl) {
+                    int b1 = bytes[offset];
+                    if (b1 >= 0) {
+                        dst[dp++] = (byte)b1;
+                        offset++;
+                        continue;
+                    }
+                    if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+                            offset + 1 < sl) {
+                        int b2 = bytes[offset + 1];
+                        if (!StringCoding.isNotContinuation(b2)) {
+                            dst[dp++] = (byte)(((b1 << 6) ^ b2)^
+                                    (((byte) 0xC0 << 6) ^
+                                            ((byte) 0x80 << 0)));
+                            offset += 2;
+                            continue;
+                        }
+                    }
+                    // anything not a latin1, including the repl
+                    // we have to go with the utf16
+                    break;
+                }
+                if (offset == sl) {
+                    if (dp != dst.length) {
+                        dst = Arrays.copyOf(dst, dp);
+                    }
+                    this.value = dst;
+                    this.coder = LATIN1;
+                    return;
+                }
+            }
+            if (dp == 0) {
+                dst = new byte[length << 1];
+            } else {
+                byte[] buf = new byte[length << 1];
+                StringLatin1.inflate(dst, 0, buf, 0, dp);
+                dst = buf;
+            }
+            while (offset < sl) {
+                int b1 = bytes[offset++];
+                if (b1 >= 0) {
+                    putChar(dst, dp++, (char) b1);
+                } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                    if (offset < sl) {
+                        int b2 = bytes[offset++];
+                        if (StringCoding.isNotContinuation(b2)) {
+                            StringCoding.throwMalformed(offset - 1, 1);
+                        } else {
+                            putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
+                                    (((byte) 0xC0 << 6) ^
+                                            ((byte) 0x80 << 0))));
+                        }
+                        continue;
+                    }
+                    StringCoding.throwMalformed(offset, 1);  // underflow()
+                    break;
+                } else if ((b1 >> 4) == -2) {
+                    if (offset + 1 < sl) {
+                        int b2 = bytes[offset++];
+                        int b3 = bytes[offset++];
+                        if (StringCoding.isMalformed3(b1, b2, b3)) {
+                            StringCoding.throwMalformed(offset - 3, 3);
+                        } else {
+                            char c = (char)((b1 << 12) ^
+                                    (b2 <<  6) ^
+                                    (b3 ^
+                                            (((byte) 0xE0 << 12) ^
+                                            ((byte) 0x80 <<  6) ^
+                                            ((byte) 0x80 <<  0))));
+                            if (isSurrogate(c)) {
+                                StringCoding.throwMalformed(offset - 3, 3);
+                            } else {
+                                putChar(dst, dp++, c);
+                            }
+                        }
+                        continue;
+                    }
+                    if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
+                        StringCoding.throwMalformed(offset - 1, 2);
+                        continue;
+                    }
+                    StringCoding.throwMalformed(offset, 1);
+                    break;
+                } else if ((b1 >> 3) == -2) {
+                    if (offset + 2 < sl) {
+                        int b2 = bytes[offset++];
+                        int b3 = bytes[offset++];
+                        int b4 = bytes[offset++];
+                        int uc = ((b1 << 18) ^
+                                  (b2 << 12) ^
+                                  (b3 <<  6) ^
+                                  (b4 ^
+                                   (((byte) 0xF0 << 18) ^
+                                   ((byte) 0x80 << 12) ^
+                                   ((byte) 0x80 <<  6) ^
+                                   ((byte) 0x80 <<  0))));
+                        if (StringCoding.isMalformed4(b2, b3, b4) ||
+                                !isSupplementaryCodePoint(uc)) { // shortest form check
+                            StringCoding.throwMalformed(offset - 4, 4);
+                        } else {
+                            putChar(dst, dp++, highSurrogate(uc));
+                            putChar(dst, dp++, lowSurrogate(uc));
+                        }
+                        continue;
+                    }
+                    b1 &= 0xff;
+                    if (b1 > 0xf4 ||
+                        offset < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
+                        StringCoding.throwMalformed(offset - 1, 1);  // or 2
+                        continue;
+                    }
+                    StringCoding.throwMalformed(offset - 1, 1);
+                    break;
+                } else {
+                    StringCoding.throwMalformed(offset - 1, 1);
+                }
+            }
+            if (dp != length) {
+                dst = Arrays.copyOf(dst, dp << 1);
+            }
+            this.value = dst;
+            this.coder = UTF16;
+        }
     }
 
     /**
@@ -605,10 +1076,7 @@ public String(byte bytes[], Charset charset) {
      * @since  1.1
      */
     public String(byte bytes[], int offset, int length) {
-        checkBoundsOffCount(offset, length, bytes.length);
-        StringCoding.Result ret = StringCoding.decode(bytes, offset, length);
-        this.value = ret.value;
-        this.coder = ret.coder;
+        this(bytes, offset, length, Charset.defaultCharset());
     }
 
     /**
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 8c0911b169acf..051404bf6f3f7 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -63,8 +63,6 @@ class StringCoding {
     private StringCoding() { }
 
     /** The cached coders for each thread */
-    private static final ThreadLocal<SoftReference<StringDecoder>> decoder =
-        new ThreadLocal<>();
     private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
         new ThreadLocal<>();
 
@@ -91,13 +89,13 @@ private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
             return Arrays.copyOf(ba, len);
     }
 
-    private static int scale(int len, float expansionFactor) {
+    static int scale(int len, float expansionFactor) {
         // We need to perform double, not float, arithmetic; otherwise
         // we lose low order bits when len is larger than 2**24.
         return (int)(len * (double)expansionFactor);
     }
 
-    private static Charset lookupCharset(String csn) {
+    static Charset lookupCharset(String csn) {
         if (Charset.isSupported(csn)) {
             try {
                 return Charset.forName(csn);
@@ -112,12 +110,6 @@ static class Result {
         byte[] value;
         byte coder;
 
-        Result with() {
-            coder = COMPACT_STRINGS ? LATIN1 : UTF16;
-            value = new byte[0];
-            return this;
-        }
-
         Result with(char[] val, int off, int len) {
             if (String.COMPACT_STRINGS) {
                 byte[] bs = StringUTF16.compress(val, off, len);
@@ -149,201 +141,6 @@ public static boolean hasNegatives(byte[] ba, int off, int len) {
         return false;
     }
 
-    // -- Decoding --
-    static class StringDecoder {
-        private final String requestedCharsetName;
-        private final Charset cs;
-        private final boolean isASCIICompatible;
-        private final CharsetDecoder cd;
-        protected final Result result;
-
-        StringDecoder(Charset cs, String rcn) {
-            this.requestedCharsetName = rcn;
-            this.cs = cs;
-            this.cd = cs.newDecoder()
-                .onMalformedInput(CodingErrorAction.REPLACE)
-                .onUnmappableCharacter(CodingErrorAction.REPLACE);
-            this.result = new Result();
-            this.isASCIICompatible = (cd instanceof ArrayDecoder) &&
-                    ((ArrayDecoder)cd).isASCIICompatible();
-        }
-
-        String charsetName() {
-            if (cs instanceof HistoricallyNamedCharset)
-                return ((HistoricallyNamedCharset)cs).historicalName();
-            return cs.name();
-        }
-
-        final String requestedCharsetName() {
-            return requestedCharsetName;
-        }
-
-        Result decode(byte[] ba, int off, int len) {
-            if (len == 0) {
-                return result.with();
-            }
-            // fastpath for ascii compatible
-            if (isASCIICompatible && !hasNegatives(ba, off, len)) {
-                if (COMPACT_STRINGS) {
-                    return result.with(Arrays.copyOfRange(ba, off, off + len),
-                                      LATIN1);
-                } else {
-                    return result.with(StringLatin1.inflate(ba, off, len), UTF16);
-                }
-            }
-            // fastpath for always Latin1 decodable single byte
-            if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
-                byte[] dst = new byte[len];
-                ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
-                return result.with(dst, LATIN1);
-            }
-            int en = scale(len, cd.maxCharsPerByte());
-            char[] ca = new char[en];
-            if (cd instanceof ArrayDecoder) {
-                int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
-                return result.with(ca, 0, clen);
-            }
-            cd.reset();
-            ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
-            CharBuffer cb = CharBuffer.wrap(ca);
-            try {
-                CoderResult cr = cd.decode(bb, cb, true);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-                cr = cd.flush(cb);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-            } catch (CharacterCodingException x) {
-                // Substitution is always enabled,
-                // so this shouldn't happen
-                throw new Error(x);
-            }
-            return result.with(ca, 0, cb.position());
-        }
-    }
-
-    static Result decode(String charsetName, byte[] ba, int off, int len)
-        throws UnsupportedEncodingException
-    {
-        StringDecoder sd = deref(decoder);
-        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
-        if ((sd == null) || !(csn.equals(sd.requestedCharsetName())
-                              || csn.equals(sd.charsetName()))) {
-            sd = null;
-            try {
-                Charset cs = lookupCharset(csn);
-                if (cs != null) {
-                    if (cs == UTF_8) {
-                        return decodeUTF8(ba, off, len, true);
-                    }
-                    if (cs == ISO_8859_1) {
-                        return decodeLatin1(ba, off, len);
-                    }
-                    if (cs == US_ASCII) {
-                        return decodeASCII(ba, off, len);
-                    }
-                    sd = new StringDecoder(cs, csn);
-                }
-            } catch (IllegalCharsetNameException x) {}
-            if (sd == null)
-                throw new UnsupportedEncodingException(csn);
-            set(decoder, sd);
-        }
-        return sd.decode(ba, off, len);
-    }
-
-    static Result decode(Charset cs, byte[] ba, int off, int len) {
-        if (cs == UTF_8) {
-            return decodeUTF8(ba, off, len, true);
-        }
-        if (cs == ISO_8859_1) {
-            return decodeLatin1(ba, off, len);
-        }
-        if (cs == US_ASCII) {
-            return decodeASCII(ba, off, len);
-        }
-
-        // (1)We never cache the "external" cs, the only benefit of creating
-        // an additional StringDe/Encoder object to wrap it is to share the
-        // de/encode() method. These SD/E objects are short-lived, the young-gen
-        // gc should be able to take care of them well. But the best approach
-        // is still not to generate them if not really necessary.
-        // (2)The defensive copy of the input byte/char[] has a big performance
-        // impact, as well as the outgoing result byte/char[]. Need to do the
-        // optimization check of (sm==null && classLoader0==null) for both.
-        // (3)There might be a timing gap in isTrusted setting. getClassLoader0()
-        // is only checked (and then isTrusted gets set) when (SM==null). It is
-        // possible that the SM==null for now but then SM is NOT null later
-        // when safeTrim() is invoked...the "safe" way to do is to redundant
-        // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
-        // but it then can be argued that the SM is null when the operation
-        // is started...
-        CharsetDecoder cd = cs.newDecoder();
-        // ascii fastpath
-        if ((cd instanceof ArrayDecoder) &&
-            ((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
-            return decodeLatin1(ba, off, len);
-        }
-        // fastpath for always Latin1 decodable single byte
-        if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
-            byte[] dst = new byte[len];
-            ((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
-            return new Result().with(dst, LATIN1);
-        }
-
-        int en = scale(len, cd.maxCharsPerByte());
-        if (len == 0) {
-            return new Result().with();
-        }
-        cd.onMalformedInput(CodingErrorAction.REPLACE)
-          .onUnmappableCharacter(CodingErrorAction.REPLACE)
-          .reset();
-        char[] ca = new char[en];
-        if (cd instanceof ArrayDecoder) {
-            int clen = ((ArrayDecoder)cd).decode(ba, off, len, ca);
-            return new Result().with(ca, 0, clen);
-        }
-        if (cs.getClass().getClassLoader0() != null &&
-            System.getSecurityManager() != null) {
-            ba = Arrays.copyOfRange(ba, off, off + len);
-            off = 0;
-        }
-        ByteBuffer bb = ByteBuffer.wrap(ba, off, len);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        try {
-            CoderResult cr = cd.decode(bb, cb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = cd.flush(cb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            // Substitution is always enabled,
-            // so this shouldn't happen
-            throw new Error(x);
-        }
-        return new Result().with(ca, 0, cb.position());
-    }
-
-    static Result decode(byte[] ba, int off, int len) {
-        Charset cs = Charset.defaultCharset();
-        if (cs == UTF_8) {
-            return decodeUTF8(ba, off, len, true);
-        }
-        if (cs == ISO_8859_1) {
-            return decodeLatin1(ba, off, len);
-        }
-        if (cs == US_ASCII) {
-            return decodeASCII(ba, off, len);
-        }
-        StringDecoder sd = deref(decoder);
-        if (sd == null || !cs.name().equals(sd.cs.name())) {
-            sd = new StringDecoder(cs, cs.name());
-            set(decoder, sd);
-        }
-        return sd.decode(ba, off, len);
-    }
-
     // -- Encoding --
     private static class StringEncoder {
         private Charset cs;
@@ -522,30 +319,8 @@ static byte[] encode(byte coder, byte[] val) {
      */
     private static native void err(String msg);
 
-     /* The cached Result for each thread */
-    private static final ThreadLocal<StringCoding.Result>
-        resultCached = new ThreadLocal<>() {
-            protected StringCoding.Result initialValue() {
-                return new StringCoding.Result();
-            }};
-
     ////////////////////////// ascii //////////////////////////////
 
-    private static Result decodeASCII(byte[] ba, int off, int len) {
-        Result result = resultCached.get();
-        if (COMPACT_STRINGS && !hasNegatives(ba, off, len)) {
-            return result.with(Arrays.copyOfRange(ba, off, off + len),
-                               LATIN1);
-        }
-        byte[] dst = new byte[len<<1];
-        int dp = 0;
-        while (dp < len) {
-            int b = ba[off++];
-            putChar(dst, dp++, (b >= 0) ? (char)b : repl);
-        }
-        return result.with(dst, UTF16);
-    }
-
     private static byte[] encodeASCII(byte coder, byte[] val) {
         if (coder == LATIN1) {
             byte[] dst = new byte[val.length];
@@ -579,17 +354,6 @@ private static byte[] encodeASCII(byte coder, byte[] val) {
         return Arrays.copyOf(dst, dp);
     }
 
-    ////////////////////////// latin1/8859_1 ///////////////////////////
-
-    private static Result decodeLatin1(byte[] ba, int off, int len) {
-       Result result = resultCached.get();
-       if (COMPACT_STRINGS) {
-           return result.with(Arrays.copyOfRange(ba, off, off + len), LATIN1);
-       } else {
-           return result.with(StringLatin1.inflate(ba, off, len), UTF16);
-       }
-    }
-
     @IntrinsicCandidate
     private static int implEncodeISOArray(byte[] sa, int sp,
                                           byte[] da, int dp, int len) {
@@ -641,37 +405,37 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
 
     //////////////////////////////// utf8 ////////////////////////////////////
 
-    private static boolean isNotContinuation(int b) {
+    static boolean isNotContinuation(int b) {
         return (b & 0xc0) != 0x80;
     }
 
-    private static boolean isMalformed3(int b1, int b2, int b3) {
+    static boolean isMalformed3(int b1, int b2, int b3) {
         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
                (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
     }
 
-    private static boolean isMalformed3_2(int b1, int b2) {
+    static boolean isMalformed3_2(int b1, int b2) {
         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
                (b2 & 0xc0) != 0x80;
     }
 
-    private static boolean isMalformed4(int b2, int b3, int b4) {
+    static boolean isMalformed4(int b2, int b3, int b4) {
         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
                (b4 & 0xc0) != 0x80;
     }
 
-    private static boolean isMalformed4_2(int b1, int b2) {
+    static boolean isMalformed4_2(int b1, int b2) {
         return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
                (b2 & 0xc0) != 0x80;
     }
 
-    private static boolean isMalformed4_3(int b3) {
+    static boolean isMalformed4_3(int b3) {
         return (b3 & 0xc0) != 0x80;
     }
 
     // for nb == 3/4
-    private static int malformedN(byte[] src, int sp, int nb) {
+    static int malformedN(byte[] src, int sp, int nb) {
         if (nb == 3) {
             int b1 = src[sp++];
             int b2 = src[sp++];    // no need to lookup b3
@@ -693,206 +457,28 @@ private static int malformedN(byte[] src, int sp, int nb) {
         return -1;
     }
 
-    private static void throwMalformed(int off, int nb) {
+    static void throwMalformed(int off, int nb) {
         String msg = "malformed input off : " + off + ", length : " + nb;
         throw new IllegalArgumentException(msg, new MalformedInputException(nb));
     }
 
-    private static void throwMalformed(byte[] val) {
+    static void throwMalformed(byte[] val) {
         int dp = 0;
         while (dp < val.length && val[dp] >=0) { dp++; }
         throwMalformed(dp, 1);
     }
 
-    private static void throwUnmappable(int off, int nb) {
+    static void throwUnmappable(int off, int nb) {
         String msg = "malformed input off : " + off + ", length : " + nb;
         throw new IllegalArgumentException(msg, new UnmappableCharacterException(nb));
     }
 
-    private static void throwUnmappable(byte[] val) {
+    static void throwUnmappable(byte[] val) {
         int dp = 0;
         while (dp < val.length && val[dp] >=0) { dp++; }
         throwUnmappable(dp, 1);
     }
 
-    private static char repl = '\ufffd';
-
-    private static Result decodeUTF8(byte[] src, int sp, int len, boolean doReplace) {
-        // ascii-bais, which has a relative impact to the non-ascii-only bytes
-        if (COMPACT_STRINGS && !hasNegatives(src, sp, len))
-            return resultCached.get().with(Arrays.copyOfRange(src, sp, sp + len),
-                                           LATIN1);
-        return decodeUTF8_0(src, sp, len, doReplace);
-    }
-
-    private static Result decodeUTF8_0(byte[] src, int sp, int len, boolean doReplace) {
-        Result ret = resultCached.get();
-
-        int sl = sp + len;
-        int dp = 0;
-        byte[] dst = new byte[len];
-
-        if (COMPACT_STRINGS) {
-            while (sp < sl) {
-                int b1 = src[sp];
-                if (b1 >= 0) {
-                    dst[dp++] = (byte)b1;
-                    sp++;
-                    continue;
-                }
-                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
-                    sp + 1 < sl) {
-                    int b2 = src[sp + 1];
-                    if (!isNotContinuation(b2)) {
-                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
-                                           (((byte) 0xC0 << 6) ^
-                                           ((byte) 0x80 << 0)));
-                        sp += 2;
-                        continue;
-                    }
-                }
-                // anything not a latin1, including the repl
-                // we have to go with the utf16
-                break;
-            }
-            if (sp == sl) {
-                if (dp != dst.length) {
-                    dst = Arrays.copyOf(dst, dp);
-                }
-                return ret.with(dst, LATIN1);
-            }
-        }
-        if (dp == 0) {
-            dst = new byte[len << 1];
-        } else {
-            byte[] buf = new byte[len << 1];
-            StringLatin1.inflate(dst, 0, buf, 0, dp);
-            dst = buf;
-        }
-        while (sp < sl) {
-            int b1 = src[sp++];
-            if (b1 >= 0) {
-                putChar(dst, dp++, (char) b1);
-            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                if (sp < sl) {
-                    int b2 = src[sp++];
-                    if (isNotContinuation(b2)) {
-                        if (!doReplace) {
-                            throwMalformed(sp - 1, 1);
-                        }
-                        putChar(dst, dp++, repl);
-                        sp--;
-                    } else {
-                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
-                                                  (((byte) 0xC0 << 6) ^
-                                                  ((byte) 0x80 << 0))));
-                    }
-                    continue;
-                }
-                if (!doReplace) {
-                    throwMalformed(sp, 1);  // underflow()
-                }
-                putChar(dst, dp++, repl);
-                break;
-            } else if ((b1 >> 4) == -2) {
-                if (sp + 1 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    if (isMalformed3(b1, b2, b3)) {
-                        if (!doReplace) {
-                            throwMalformed(sp - 3, 3);
-                        }
-                        putChar(dst, dp++, repl);
-                        sp -= 3;
-                        sp += malformedN(src, sp, 3);
-                    } else {
-                        char c = (char)((b1 << 12) ^
-                                        (b2 <<  6) ^
-                                        (b3 ^
-                                         (((byte) 0xE0 << 12) ^
-                                         ((byte) 0x80 <<  6) ^
-                                         ((byte) 0x80 <<  0))));
-                        if (isSurrogate(c)) {
-                            if (!doReplace) {
-                                throwMalformed(sp - 3, 3);
-                            }
-                            putChar(dst, dp++, repl);
-                        } else {
-                            putChar(dst, dp++, c);
-                        }
-                    }
-                    continue;
-                }
-                if (sp  < sl && isMalformed3_2(b1, src[sp])) {
-                    if (!doReplace) {
-                        throwMalformed(sp - 1, 2);
-                    }
-                    putChar(dst, dp++, repl);
-                    continue;
-                }
-                if (!doReplace){
-                    throwMalformed(sp, 1);
-                }
-                putChar(dst, dp++, repl);
-                break;
-            } else if ((b1 >> 3) == -2) {
-                if (sp + 2 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    int b4 = src[sp++];
-                    int uc = ((b1 << 18) ^
-                              (b2 << 12) ^
-                              (b3 <<  6) ^
-                              (b4 ^
-                               (((byte) 0xF0 << 18) ^
-                               ((byte) 0x80 << 12) ^
-                               ((byte) 0x80 <<  6) ^
-                               ((byte) 0x80 <<  0))));
-                    if (isMalformed4(b2, b3, b4) ||
-                        !isSupplementaryCodePoint(uc)) { // shortest form check
-                        if (!doReplace) {
-                            throwMalformed(sp - 4, 4);
-                        }
-                        putChar(dst, dp++, repl);
-                        sp -= 4;
-                        sp += malformedN(src, sp, 4);
-                    } else {
-                        putChar(dst, dp++, highSurrogate(uc));
-                        putChar(dst, dp++, lowSurrogate(uc));
-                    }
-                    continue;
-                }
-                b1 &= 0xff;
-                if (b1 > 0xf4 ||
-                    sp  < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
-                    if (!doReplace) {
-                        throwMalformed(sp - 1, 1);  // or 2
-                    }
-                    putChar(dst, dp++, repl);
-                    continue;
-                }
-                if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
-                }
-                sp++;
-                putChar(dst, dp++, repl);
-                if (sp  < sl && isMalformed4_3(src[sp])) {
-                    continue;
-                }
-                break;
-            } else {
-                if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
-                }
-                putChar(dst, dp++, repl);
-            }
-        }
-        if (dp != len) {
-            dst = Arrays.copyOf(dst, dp << 1);
-        }
-        return ret.with(dst, UTF16);
-    }
-
     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
         if (coder == UTF16)
             return encodeUTF8_UTF16(val, doReplace);
@@ -967,18 +553,6 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
         return Arrays.copyOf(dst, dp);
     }
 
-    ////////////////////// for j.u.z.ZipCoder //////////////////////////
-
-    /*
-     * Throws iae, instead of replacing, if malformed or unmappable.
-     */
-    static String newStringUTF8NoRepl(byte[] src, int off, int len) {
-        if (COMPACT_STRINGS && !hasNegatives(src, off, len))
-            return new String(Arrays.copyOfRange(src, off, off + len), LATIN1);
-        Result ret = decodeUTF8_0(src, off, len, false);
-        return new String(ret.value, ret.coder);
-    }
-
     /*
      * Throws iae, instead of replacing, if unmappable.
      */
@@ -992,75 +566,6 @@ private static boolean isASCII(byte[] src) {
         return !hasNegatives(src, 0, src.length);
     }
 
-    private static String newStringLatin1(byte[] src) {
-        if (COMPACT_STRINGS)
-           return new String(src, LATIN1);
-        return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
-    }
-
-    static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
-        try {
-            return newStringNoRepl1(src, cs);
-        } catch (IllegalArgumentException e) {
-            //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
-            Throwable cause = e.getCause();
-            if (cause instanceof MalformedInputException) {
-                throw (MalformedInputException)cause;
-            }
-            throw (CharacterCodingException)cause;
-        }
-    }
-
-    static String newStringNoRepl1(byte[] src, Charset cs) {
-        if (cs == UTF_8) {
-            if (COMPACT_STRINGS && isASCII(src))
-                return new String(src, LATIN1);
-            Result ret = decodeUTF8_0(src, 0, src.length, false);
-            return new String(ret.value, ret.coder);
-        }
-        if (cs == ISO_8859_1) {
-            return newStringLatin1(src);
-        }
-        if (cs == US_ASCII) {
-            if (isASCII(src)) {
-                return newStringLatin1(src);
-            } else {
-                throwMalformed(src);
-            }
-        }
-
-        CharsetDecoder cd = cs.newDecoder();
-        // ascii fastpath
-        if ((cd instanceof ArrayDecoder) &&
-            ((ArrayDecoder)cd).isASCIICompatible() && isASCII(src)) {
-            return newStringLatin1(src);
-        }
-        int len = src.length;
-        if (len == 0) {
-            return "";
-        }
-        int en = scale(len, cd.maxCharsPerByte());
-        char[] ca = new char[en];
-        if (cs.getClass().getClassLoader0() != null &&
-            System.getSecurityManager() != null) {
-            src = Arrays.copyOf(src, len);
-        }
-        ByteBuffer bb = ByteBuffer.wrap(src);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        try {
-            CoderResult cr = cd.decode(bb, cb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = cd.flush(cb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new IllegalArgumentException(x);  // todo
-        }
-        Result ret = resultCached.get().with(ca, 0, cb.position());
-        return new String(ret.value, ret.coder);
-    }
-
     /*
      * Throws CCE, instead of replacing, if unmappable.
      */
diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
index 7d94e041cc3e1..91fbadc4d1bfa 100644
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@@ -2262,7 +2262,7 @@ public Stream<ModuleLayer> layers(ClassLoader loader) {
             }
 
             public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingException  {
-                return StringCoding.newStringNoRepl(bytes, cs);
+                return String.newStringNoRepl(bytes, cs);
             }
 
             public byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
@@ -2270,7 +2270,7 @@ public byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingExcepti
             }
 
             public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
-                return StringCoding.newStringUTF8NoRepl(bytes, off, len);
+                return String.newStringUTF8NoRepl(bytes, off, len);
             }
 
             public byte[] getBytesUTF8NoRepl(String s) {

From 67067dbcb4598a3d01ecf609b39ddf1d4af7e3d6 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 17:51:49 +0100
Subject: [PATCH 02/25] Cleanups and small fixes

---
 .../share/classes/java/lang/String.java       | 260 +++++++++---------
 1 file changed, 130 insertions(+), 130 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index ff95f419d8153..980cae12bae9a 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -529,6 +529,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                 this.coder = LATIN1;
+                return;
             } else {
                 int sl = offset + length;
                 int dp = 0;
@@ -709,19 +710,19 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             // ascii fastpath
             if ((cd instanceof ArrayDecoder) &&
                     ((ArrayDecoder)cd).isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
-                if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+                if (COMPACT_STRINGS) {
                     this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                     this.coder = LATIN1;
-                } else {
-                    byte[] dst = new byte[length << 1];
-                    int dp = 0;
-                    while (dp < length) {
-                        int b = bytes[offset++];
-                        putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
-                    }
-                    this.value = dst;
-                    this.coder = UTF16;
+                    return;
+                }
+                byte[] dst = new byte[length << 1];
+                int dp = 0;
+                while (dp < length) {
+                    int b = bytes[offset++];
+                    putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
                 }
+                this.value = dst;
+                this.coder = UTF16;
                 return;
             }
             // fastpath for always Latin1 decodable single byte
@@ -733,12 +734,12 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 return;
             }
 
-            int en = StringCoding.scale(length, cd.maxCharsPerByte());
             if (length == 0) {
                 this.value = "".value;
                 this.coder = "".coder;
                 return;
             }
+            int en = StringCoding.scale(length, cd.maxCharsPerByte());
             cd.onMalformedInput(CodingErrorAction.REPLACE)
                     .onUnmappableCharacter(CodingErrorAction.REPLACE)
                     .reset();
@@ -816,11 +817,15 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
             return newStringUTF8NoRepl(src, 0, src.length);
         }
         if (cs == ISO_8859_1) {
-            return new String(src, 0, src.length, ISO_8859_1);
+            if (COMPACT_STRINGS)
+                return new String(src, LATIN1);
+            return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
         }
         if (cs == US_ASCII) {
             if (!StringCoding.hasNegatives(src, 0, src.length)) {
-                return new String(src, 0, src.length, ISO_8859_1);
+                if (COMPACT_STRINGS)
+                    return new String(src, LATIN1);
+                return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
             } else {
                 StringCoding.throwMalformed(src);
             }
@@ -864,137 +869,132 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
      */
     private String(byte[] bytes, int offset, int length, Void throwOnError) {
         checkBoundsOffCount(offset, length, bytes.length);
-        if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
-            this.value = Arrays.copyOfRange(bytes, offset, offset + length);
-            this.coder = LATIN1;
-        } else {
-            int sl = offset + length;
-            int dp = 0;
-            byte[] dst = new byte[length];
-            if (COMPACT_STRINGS) {
-                while (offset < sl) {
-                    int b1 = bytes[offset];
-                    if (b1 >= 0) {
-                        dst[dp++] = (byte)b1;
-                        offset++;
-                        continue;
-                    }
-                    if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
-                            offset + 1 < sl) {
-                        int b2 = bytes[offset + 1];
-                        if (!StringCoding.isNotContinuation(b2)) {
-                            dst[dp++] = (byte)(((b1 << 6) ^ b2)^
-                                    (((byte) 0xC0 << 6) ^
-                                            ((byte) 0x80 << 0)));
-                            offset += 2;
-                            continue;
-                        }
-                    }
-                    // anything not a latin1, including the repl
-                    // we have to go with the utf16
-                    break;
+        int sl = offset + length;
+        int dp = 0;
+        byte[] dst = new byte[length];
+        if (COMPACT_STRINGS) {
+            while (offset < sl) {
+                int b1 = bytes[offset];
+                if (b1 >= 0) {
+                    dst[dp++] = (byte)b1;
+                    offset++;
+                    continue;
                 }
-                if (offset == sl) {
-                    if (dp != dst.length) {
-                        dst = Arrays.copyOf(dst, dp);
+                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+                        offset + 1 < sl) {
+                    int b2 = bytes[offset + 1];
+                    if (!StringCoding.isNotContinuation(b2)) {
+                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
+                                (((byte) 0xC0 << 6) ^
+                                        ((byte) 0x80 << 0)));
+                        offset += 2;
+                        continue;
                     }
-                    this.value = dst;
-                    this.coder = LATIN1;
-                    return;
                 }
+                // anything not a latin1, including the repl
+                // we have to go with the utf16
+                break;
             }
-            if (dp == 0) {
-                dst = new byte[length << 1];
-            } else {
-                byte[] buf = new byte[length << 1];
-                StringLatin1.inflate(dst, 0, buf, 0, dp);
-                dst = buf;
+            if (offset == sl) {
+                if (dp != dst.length) {
+                    dst = Arrays.copyOf(dst, dp);
+                }
+                this.value = dst;
+                this.coder = LATIN1;
+                return;
             }
-            while (offset < sl) {
-                int b1 = bytes[offset++];
-                if (b1 >= 0) {
-                    putChar(dst, dp++, (char) b1);
-                } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                    if (offset < sl) {
-                        int b2 = bytes[offset++];
-                        if (StringCoding.isNotContinuation(b2)) {
-                            StringCoding.throwMalformed(offset - 1, 1);
-                        } else {
-                            putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
-                                    (((byte) 0xC0 << 6) ^
-                                            ((byte) 0x80 << 0))));
-                        }
-                        continue;
+        }
+        if (dp == 0) {
+            dst = new byte[length << 1];
+        } else {
+            byte[] buf = new byte[length << 1];
+            StringLatin1.inflate(dst, 0, buf, 0, dp);
+            dst = buf;
+        }
+        while (offset < sl) {
+            int b1 = bytes[offset++];
+            if (b1 >= 0) {
+                putChar(dst, dp++, (char) b1);
+            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                if (offset < sl) {
+                    int b2 = bytes[offset++];
+                    if (StringCoding.isNotContinuation(b2)) {
+                        StringCoding.throwMalformed(offset - 1, 1);
+                    } else {
+                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
+                                (((byte) 0xC0 << 6) ^
+                                        ((byte) 0x80 << 0))));
                     }
-                    StringCoding.throwMalformed(offset, 1);  // underflow()
-                    break;
-                } else if ((b1 >> 4) == -2) {
-                    if (offset + 1 < sl) {
-                        int b2 = bytes[offset++];
-                        int b3 = bytes[offset++];
-                        if (StringCoding.isMalformed3(b1, b2, b3)) {
+                    continue;
+                }
+                StringCoding.throwMalformed(offset, 1);  // underflow()
+                break;
+            } else if ((b1 >> 4) == -2) {
+                if (offset + 1 < sl) {
+                    int b2 = bytes[offset++];
+                    int b3 = bytes[offset++];
+                    if (StringCoding.isMalformed3(b1, b2, b3)) {
+                        StringCoding.throwMalformed(offset - 3, 3);
+                    } else {
+                        char c = (char)((b1 << 12) ^
+                                (b2 <<  6) ^
+                                (b3 ^
+                                        (((byte) 0xE0 << 12) ^
+                                        ((byte) 0x80 <<  6) ^
+                                        ((byte) 0x80 <<  0))));
+                        if (isSurrogate(c)) {
                             StringCoding.throwMalformed(offset - 3, 3);
                         } else {
-                            char c = (char)((b1 << 12) ^
-                                    (b2 <<  6) ^
-                                    (b3 ^
-                                            (((byte) 0xE0 << 12) ^
-                                            ((byte) 0x80 <<  6) ^
-                                            ((byte) 0x80 <<  0))));
-                            if (isSurrogate(c)) {
-                                StringCoding.throwMalformed(offset - 3, 3);
-                            } else {
-                                putChar(dst, dp++, c);
-                            }
+                            putChar(dst, dp++, c);
                         }
-                        continue;
                     }
-                    if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
-                        StringCoding.throwMalformed(offset - 1, 2);
-                        continue;
-                    }
-                    StringCoding.throwMalformed(offset, 1);
-                    break;
-                } else if ((b1 >> 3) == -2) {
-                    if (offset + 2 < sl) {
-                        int b2 = bytes[offset++];
-                        int b3 = bytes[offset++];
-                        int b4 = bytes[offset++];
-                        int uc = ((b1 << 18) ^
-                                  (b2 << 12) ^
-                                  (b3 <<  6) ^
-                                  (b4 ^
-                                   (((byte) 0xF0 << 18) ^
-                                   ((byte) 0x80 << 12) ^
-                                   ((byte) 0x80 <<  6) ^
-                                   ((byte) 0x80 <<  0))));
-                        if (StringCoding.isMalformed4(b2, b3, b4) ||
-                                !isSupplementaryCodePoint(uc)) { // shortest form check
-                            StringCoding.throwMalformed(offset - 4, 4);
-                        } else {
-                            putChar(dst, dp++, highSurrogate(uc));
-                            putChar(dst, dp++, lowSurrogate(uc));
-                        }
-                        continue;
-                    }
-                    b1 &= 0xff;
-                    if (b1 > 0xf4 ||
-                        offset < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
-                        StringCoding.throwMalformed(offset - 1, 1);  // or 2
-                        continue;
+                    continue;
+                }
+                if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
+                    StringCoding.throwMalformed(offset - 1, 2);
+                    continue;
+                }
+                StringCoding.throwMalformed(offset, 1);
+                break;
+            } else if ((b1 >> 3) == -2) {
+                if (offset + 2 < sl) {
+                    int b2 = bytes[offset++];
+                    int b3 = bytes[offset++];
+                    int b4 = bytes[offset++];
+                    int uc = ((b1 << 18) ^
+                              (b2 << 12) ^
+                              (b3 <<  6) ^
+                              (b4 ^
+                               (((byte) 0xF0 << 18) ^
+                               ((byte) 0x80 << 12) ^
+                               ((byte) 0x80 <<  6) ^
+                               ((byte) 0x80 <<  0))));
+                    if (StringCoding.isMalformed4(b2, b3, b4) ||
+                            !isSupplementaryCodePoint(uc)) { // shortest form check
+                        StringCoding.throwMalformed(offset - 4, 4);
+                    } else {
+                        putChar(dst, dp++, highSurrogate(uc));
+                        putChar(dst, dp++, lowSurrogate(uc));
                     }
-                    StringCoding.throwMalformed(offset - 1, 1);
-                    break;
-                } else {
-                    StringCoding.throwMalformed(offset - 1, 1);
+                    continue;
                 }
+                b1 &= 0xff;
+                if (b1 > 0xf4 ||
+                    offset < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
+                    StringCoding.throwMalformed(offset - 1, 1);  // or 2
+                    continue;
+                }
+                StringCoding.throwMalformed(offset - 1, 1);
+                break;
+            } else {
+                StringCoding.throwMalformed(offset - 1, 1);
             }
-            if (dp != length) {
-                dst = Arrays.copyOf(dst, dp << 1);
-            }
-            this.value = dst;
-            this.coder = UTF16;
         }
+        if (dp != length) {
+            dst = Arrays.copyOf(dst, dp << 1);
+        }
+        this.value = dst;
+        this.coder = UTF16;
     }
 
     /**

From 6335b3bb0d7d8b63debbf06f21b7c90d867731ec Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 18:34:09 +0100
Subject: [PATCH 03/25] Break bootstrap order issues by statically importing
 Charsets from StringCoding rather than eagerly initialize on String clinit

---
 src/java.base/share/classes/java/lang/String.java       | 7 +++----
 src/java.base/share/classes/java/lang/StringCoding.java | 6 +++---
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 980cae12bae9a..06f6cc57a5652 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -57,6 +57,9 @@
 
 import static java.lang.Character.*;
 import static java.lang.Character.lowSurrogate;
+import static java.lang.StringCoding.ISO_8859_1;
+import static java.lang.StringCoding.US_ASCII;
+import static java.lang.StringCoding.UTF_8;
 import static java.lang.StringUTF16.putChar;
 import static java.util.function.Predicate.not;
 
@@ -223,10 +226,6 @@ public final class String
         COMPACT_STRINGS = true;
     }
 
-    private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
-    private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
-    private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
-
     private static final char REPL = '\ufffd';
 
     /**
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 051404bf6f3f7..827d46ae22848 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -66,9 +66,9 @@ private StringCoding() { }
     private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
         new ThreadLocal<>();
 
-    private static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
-    private static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
-    private static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
+    static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
+    static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
+    static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
 
     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
         SoftReference<T> sr = tl.get();

From a99079a8071d1236265ec34d21d65b5e93e3cfdf Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 18:51:49 +0100
Subject: [PATCH 04/25] Add simple StringDecode micro

---
 .../openjdk/bench/java/lang/StringDecode.java | 81 +++++++++++++++++++
 1 file changed, 81 insertions(+)
 create mode 100644 test/micro/org/openjdk/bench/java/lang/StringDecode.java

diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
new file mode 100644
index 0000000000000..d9fa17c14e733
--- /dev/null
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang;
+
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Fork(value = 3)
+@Warmup(iterations = 5, time = 1)
+@Measurement(iterations = 5, time = 2)
+@State(Scope.Thread)
+public class StringDecode {
+
+    @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "UTF_16"})
+    private String charsetName;
+
+    private Charset charset;
+
+    private byte[] asciiString;
+    private byte[] utf16String;
+
+    private byte[] asciiDefaultString;
+    private byte[] utf16DefaultString;
+    @Setup
+    public void setup() {
+        charset = Charset.forName(charsetName);
+        asciiString = "ascii string".getBytes(charset);
+        utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
+
+        asciiDefaultString = "ascii string".getBytes();
+        utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
+    }
+
+    @Benchmark
+    public String decodeCharsetName(Blackhole bh) throws Exception {
+        bh.consume(new String(asciiString, charsetName));
+        bh.consume(new String(utf16String, charsetName));
+    }
+
+    @Benchmark
+    public String decodeCharset(Blackhole bh) throws Exception {
+        bh.consume(new String(asciiString, charset));
+        bh.consume(new String(utf16String, charset));
+    }
+
+    @Benchmark
+    public String decodeDefault(Blackhole bh) throws Exception {
+        bh.consume(new String(asciiDefaultString, charset));
+        bh.consume(new String(utf16DefaultString, charset));
+    }
+}

From 1a9797c57a0573931a01c346d0abd39f004f3a16 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 18:55:33 +0100
Subject: [PATCH 05/25] Fix micro

---
 .../org/openjdk/bench/java/lang/StringDecode.java      | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
index d9fa17c14e733..01dcfc3e09958 100644
--- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -24,12 +24,16 @@
 
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Measurement;
 import org.openjdk.jmh.annotations.Mode;
 import org.openjdk.jmh.annotations.OutputTimeUnit;
 import org.openjdk.jmh.annotations.Param;
 import org.openjdk.jmh.annotations.Scope;
 import org.openjdk.jmh.annotations.Setup;
 import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
 
 import java.util.concurrent.TimeUnit;
 
@@ -62,19 +66,19 @@ public void setup() {
     }
 
     @Benchmark
-    public String decodeCharsetName(Blackhole bh) throws Exception {
+    public void decodeCharsetName(Blackhole bh) throws Exception {
         bh.consume(new String(asciiString, charsetName));
         bh.consume(new String(utf16String, charsetName));
     }
 
     @Benchmark
-    public String decodeCharset(Blackhole bh) throws Exception {
+    public void decodeCharset(Blackhole bh) throws Exception {
         bh.consume(new String(asciiString, charset));
         bh.consume(new String(utf16String, charset));
     }
 
     @Benchmark
-    public String decodeDefault(Blackhole bh) throws Exception {
+    public void decodeDefault(Blackhole bh) throws Exception {
         bh.consume(new String(asciiDefaultString, charset));
         bh.consume(new String(utf16DefaultString, charset));
     }

From f3b7f74e1e7e5af5f9d28ab8ebdfceb5ba2d8599 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 14 Jan 2021 19:18:07 +0100
Subject: [PATCH 06/25] More micro fixes

---
 test/micro/org/openjdk/bench/java/lang/StringDecode.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
index 01dcfc3e09958..fc048272ba66c 100644
--- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -35,6 +35,7 @@
 import org.openjdk.jmh.annotations.Warmup;
 import org.openjdk.jmh.infra.Blackhole;
 
+import java.nio.charset.Charset;
 import java.util.concurrent.TimeUnit;
 
 @BenchmarkMode(Mode.AverageTime)
@@ -55,6 +56,7 @@ public class StringDecode {
 
     private byte[] asciiDefaultString;
     private byte[] utf16DefaultString;
+
     @Setup
     public void setup() {
         charset = Charset.forName(charsetName);

From 14b142bc12b4622c53d3768d1b7dc2dc573c4c4e Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 01:44:16 +0100
Subject: [PATCH 07/25] Cleanups, minor improvements

---
 .../share/classes/java/lang/String.java       | 17 +++--
 .../share/classes/java/lang/StringCoding.java | 25 --------
 .../openjdk/bench/java/lang/StringDecode.java | 63 +++++++++++--------
 3 files changed, 45 insertions(+), 60 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 06f6cc57a5652..eb962b1dcf7fa 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -714,13 +714,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                     this.coder = LATIN1;
                     return;
                 }
-                byte[] dst = new byte[length << 1];
-                int dp = 0;
-                while (dp < length) {
-                    int b = bytes[offset++];
-                    putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
-                }
-                this.value = dst;
+                this.value = StringLatin1.inflate(bytes, offset, length);
                 this.coder = UTF16;
                 return;
             }
@@ -858,8 +852,13 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
         } catch (CharacterCodingException x) {
             throw new IllegalArgumentException(x);
         }
-        StringCoding.Result ret = new StringCoding.Result().with(ca, 0, cb.position());
-        return new String(ret.value, ret.coder);
+        if (COMPACT_STRINGS) {
+            byte[] bs = StringUTF16.compress(ca, 0, cb.position());
+            if (bs != null) {
+                return new String(bs, LATIN1);
+            }
+        }
+        return new String(StringUTF16.toBytes(ca, 0, cb.position()), UTF16);
     }
 
     /*
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 67559a1958ad1..0661ea155d116 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -106,31 +106,6 @@ static Charset lookupCharset(String csn) {
         return null;
     }
 
-    static class Result {
-        byte[] value;
-        byte coder;
-
-        Result with(char[] val, int off, int len) {
-            if (String.COMPACT_STRINGS) {
-                byte[] bs = StringUTF16.compress(val, off, len);
-                if (bs != null) {
-                    value = bs;
-                    coder = LATIN1;
-                    return this;
-                }
-            }
-            coder = UTF16;
-            value = StringUTF16.toBytes(val, off, len);
-            return this;
-        }
-
-        Result with(byte[] val, byte coder) {
-            this.coder = coder;
-            value = val;
-            return this;
-        }
-    }
-
     @IntrinsicCandidate
     public static boolean hasNegatives(byte[] ba, int off, int len) {
         for (int i = off; i < off + len; i++) {
diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
index fc048272ba66c..26235131aec3d 100644
--- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -40,48 +40,59 @@
 
 @BenchmarkMode(Mode.AverageTime)
 @OutputTimeUnit(TimeUnit.NANOSECONDS)
-@Fork(value = 3)
-@Warmup(iterations = 5, time = 1)
-@Measurement(iterations = 5, time = 2)
+@Fork(value = 3, jvmArgs = "-Xmx1g")
+@Warmup(iterations = 5, time = 2)
+@Measurement(iterations = 5, time = 3)
 @State(Scope.Thread)
 public class StringDecode {
 
-    @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "UTF_16"})
-    private String charsetName;
+    @BenchmarkMode(Mode.AverageTime)
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    @Fork(value = 3, jvmArgs = "-Xmx1g")
+    @Warmup(iterations = 5, time = 2)
+    @Measurement(iterations = 5, time = 2)
+    @State(Scope.Thread)
+    public static class WithCharset {
 
-    private Charset charset;
+        @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "UTF_16"})
+        private String charsetName;
 
-    private byte[] asciiString;
-    private byte[] utf16String;
+        private Charset charset;
+        private byte[] asciiString;
+        private byte[] utf16String;
+
+        @Setup
+        public void setup() {
+            charset = Charset.forName(charsetName);
+            asciiString = "ascii string".getBytes(charset);
+            utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
+        }
+
+        @Benchmark
+        public void decodeCharsetName(Blackhole bh) throws Exception {
+            bh.consume(new String(asciiString, charsetName));
+            bh.consume(new String(utf16String, charsetName));
+        }
+
+        @Benchmark
+        public void decodeCharset(Blackhole bh) throws Exception {
+            bh.consume(new String(asciiString, charset));
+            bh.consume(new String(utf16String, charset));
+        }
+    }
 
     private byte[] asciiDefaultString;
     private byte[] utf16DefaultString;
 
     @Setup
     public void setup() {
-        charset = Charset.forName(charsetName);
-        asciiString = "ascii string".getBytes(charset);
-        utf16String = "UTF-\uFF11\uFF16 string".getBytes(charset);
-
         asciiDefaultString = "ascii string".getBytes();
         utf16DefaultString = "UTF-\uFF11\uFF16 string".getBytes();
     }
 
-    @Benchmark
-    public void decodeCharsetName(Blackhole bh) throws Exception {
-        bh.consume(new String(asciiString, charsetName));
-        bh.consume(new String(utf16String, charsetName));
-    }
-
-    @Benchmark
-    public void decodeCharset(Blackhole bh) throws Exception {
-        bh.consume(new String(asciiString, charset));
-        bh.consume(new String(utf16String, charset));
-    }
-
     @Benchmark
     public void decodeDefault(Blackhole bh) throws Exception {
-        bh.consume(new String(asciiDefaultString, charset));
-        bh.consume(new String(utf16DefaultString, charset));
+        bh.consume(new String(asciiDefaultString));
+        bh.consume(new String(utf16DefaultString));
     }
 }

From d336ac432b13d18ceedb0a4f7a9a0adcbb1e0599 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 03:19:38 +0100
Subject: [PATCH 08/25] Cleanup includes etc

---
 .../share/classes/java/lang/String.java       | 139 ++++++++++--------
 .../openjdk/bench/java/lang/StringDecode.java |   2 +-
 2 files changed, 79 insertions(+), 62 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index eb962b1dcf7fa..0230f639a7c41 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -33,7 +33,12 @@
 import java.lang.constant.ConstantDesc;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
-import java.nio.charset.*;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
+import java.nio.charset.CoderResult;
+import java.nio.charset.CodingErrorAction;
+import java.nio.charset.MalformedInputException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -55,8 +60,6 @@
 import jdk.internal.vm.annotation.Stable;
 import sun.nio.cs.ArrayDecoder;
 
-import static java.lang.Character.*;
-import static java.lang.Character.lowSurrogate;
 import static java.lang.StringCoding.ISO_8859_1;
 import static java.lang.StringCoding.US_ASCII;
 import static java.lang.StringCoding.UTF_8;
@@ -524,6 +527,11 @@ public String(byte[] bytes, int offset, int length, String charsetName)
     public String(byte[] bytes, int offset, int length, Charset charset) {
         Objects.requireNonNull(charset);
         checkBoundsOffCount(offset, length, bytes.length);
+        if (length == 0) {
+            this.value = "".value;
+            this.coder = "".coder;
+            return;
+        }
         if (charset == UTF_8) {
             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
@@ -607,7 +615,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                                                  (((byte) 0xE0 << 12) ^
                                                   ((byte) 0x80 <<  6) ^
                                                   ((byte) 0x80 <<  0))));
-                                if (isSurrogate(c)) {
+                                if (Character.isSurrogate(c)) {
                                     putChar(dst, dp++, REPL);
                                 } else {
                                     putChar(dst, dp++, c);
@@ -635,13 +643,13 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                                         ((byte) 0x80 <<  6) ^
                                         ((byte) 0x80 <<  0))));
                             if (StringCoding.isMalformed4(b2, b3, b4) ||
-                                    !isSupplementaryCodePoint(uc)) { // shortest form check
+                                    !Character.isSupplementaryCodePoint(uc)) { // shortest form check
                                 putChar(dst, dp++, REPL);
                                 offset -= 4;
                                 offset += StringCoding.malformedN(bytes, offset, 4);
                             } else {
-                                putChar(dst, dp++, highSurrogate(uc));
-                                putChar(dst, dp++, lowSurrogate(uc));
+                                putChar(dst, dp++, Character.highSurrogate(uc));
+                                putChar(dst, dp++, Character.lowSurrogate(uc));
                             }
                             continue;
                         }
@@ -706,39 +714,35 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             // but it then can be argued that the SM is null when the operation
             // is started...
             CharsetDecoder cd = charset.newDecoder();
-            // ascii fastpath
-            if ((cd instanceof ArrayDecoder) &&
-                    ((ArrayDecoder)cd).isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
-                if (COMPACT_STRINGS) {
-                    this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+            // ArrayDecoder fastpaths
+            if (cd instanceof ArrayDecoder ad) {
+                // ascii
+                if (ad.isASCIICompatible() && !StringCoding.hasNegatives(bytes, offset, length)) {
+                    if (COMPACT_STRINGS) {
+                        this.value = Arrays.copyOfRange(bytes, offset, offset + length);
+                        this.coder = LATIN1;
+                        return;
+                    }
+                    this.value = StringLatin1.inflate(bytes, offset, length);
+                    this.coder = UTF16;
+                    return;
+                }
+
+                // fastpath for always Latin1 decodable single byte
+                if (COMPACT_STRINGS && ad.isLatin1Decodable()) {
+                    byte[] dst = new byte[length];
+                    ad.decodeToLatin1(bytes, offset, length, dst);
+                    this.value = dst;
                     this.coder = LATIN1;
                     return;
                 }
-                this.value = StringLatin1.inflate(bytes, offset, length);
-                this.coder = UTF16;
-                return;
-            }
-            // fastpath for always Latin1 decodable single byte
-            if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
-                byte[] dst = new byte[length];
-                ((ArrayDecoder)cd).decodeToLatin1(bytes, offset, length, dst);
-                this.value = dst;
-                this.coder = LATIN1;
-                return;
-            }
 
-            if (length == 0) {
-                this.value = "".value;
-                this.coder = "".coder;
-                return;
-            }
-            int en = StringCoding.scale(length, cd.maxCharsPerByte());
-            cd.onMalformedInput(CodingErrorAction.REPLACE)
-                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
-                    .reset();
-            char[] ca = new char[en];
-            if (cd instanceof ArrayDecoder) {
-                int clen = ((ArrayDecoder)cd).decode(bytes, offset, length, ca);
+                int en = StringCoding.scale(length, cd.maxCharsPerByte());
+                cd.onMalformedInput(CodingErrorAction.REPLACE)
+                        .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                        .reset();
+                char[] ca = new char[en];
+                int clen = ad.decode(bytes, offset, length, ca);
                 if (COMPACT_STRINGS) {
                     byte[] bs = StringUTF16.compress(ca, 0, clen);
                     if (bs != null) {
@@ -751,27 +755,22 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 value = StringUTF16.toBytes(ca, 0, clen);
                 return;
             }
+
+            // decode using CharsetDecoder
+            int en = StringCoding.scale(length, cd.maxCharsPerByte());
+            cd.onMalformedInput(CodingErrorAction.REPLACE)
+                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
+                    .reset();
+            char[] ca = new char[en];
             if (charset.getClass().getClassLoader0() != null &&
                     System.getSecurityManager() != null) {
                 bytes = Arrays.copyOfRange(bytes, offset, offset + length);
                 offset = 0;
             }
-            ByteBuffer bb = ByteBuffer.wrap(bytes, offset, length);
-            CharBuffer cb = CharBuffer.wrap(ca);
-            try {
-                CoderResult cr = cd.decode(bb, cb, true);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-                cr = cd.flush(cb);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-            } catch (CharacterCodingException x) {
-                // Substitution is always enabled,
-                // so this shouldn't happen
-                throw new Error(x);
-            }
+
+            int caLen = decode(cd, ca, bytes, offset, length);
             if (COMPACT_STRINGS) {
-                byte[] bs = StringUTF16.compress(ca, 0, cb.position());
+                byte[] bs = StringUTF16.compress(ca, 0, caLen);
                 if (bs != null) {
                     value = bs;
                     coder = LATIN1;
@@ -779,8 +778,26 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 }
             }
             coder = UTF16;
-            value = StringUTF16.toBytes(ca, 0, cb.position());
+            value = StringUTF16.toBytes(ca, 0, caLen);
+        }
+    }
+
+    private static int decode(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
+        ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
+        CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            // Substitution is always enabled,
+            // so this shouldn't happen
+            throw new Error(x);
         }
+        return cb.position();
     }
 
     ////////////////////// for j.u.z.ZipCoder //////////////////////////
@@ -798,8 +815,8 @@ static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingExce
         } catch (IllegalArgumentException e) {
             //newStringNoRepl1 throws IAE with MalformedInputException or CCE as the cause
             Throwable cause = e.getCause();
-            if (cause instanceof MalformedInputException) {
-                throw (MalformedInputException)cause;
+            if (cause instanceof MalformedInputException mie) {
+                throw mie;
             }
             throw (CharacterCodingException)cause;
         }
@@ -826,8 +843,8 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
 
         CharsetDecoder cd = cs.newDecoder();
         // ascii fastpath
-        if ((cd instanceof ArrayDecoder) &&
-                ((ArrayDecoder)cd).isASCIICompatible() && !StringCoding.hasNegatives(src, 0, src.length)) {
+        if (cd instanceof ArrayDecoder ad &&
+                ad.isASCIICompatible() && !StringCoding.hasNegatives(src, 0, src.length)) {
             return new String(src, 0, src.length, ISO_8859_1);
         }
         int len = src.length;
@@ -940,7 +957,7 @@ private String(byte[] bytes, int offset, int length, Void throwOnError) {
                                         (((byte) 0xE0 << 12) ^
                                         ((byte) 0x80 <<  6) ^
                                         ((byte) 0x80 <<  0))));
-                        if (isSurrogate(c)) {
+                        if (Character.isSurrogate(c)) {
                             StringCoding.throwMalformed(offset - 3, 3);
                         } else {
                             putChar(dst, dp++, c);
@@ -968,11 +985,11 @@ private String(byte[] bytes, int offset, int length, Void throwOnError) {
                                ((byte) 0x80 <<  6) ^
                                ((byte) 0x80 <<  0))));
                     if (StringCoding.isMalformed4(b2, b3, b4) ||
-                            !isSupplementaryCodePoint(uc)) { // shortest form check
+                            !Character.isSupplementaryCodePoint(uc)) { // shortest form check
                         StringCoding.throwMalformed(offset - 4, 4);
                     } else {
-                        putChar(dst, dp++, highSurrogate(uc));
-                        putChar(dst, dp++, lowSurrogate(uc));
+                        putChar(dst, dp++, Character.highSurrogate(uc));
+                        putChar(dst, dp++, Character.lowSurrogate(uc));
                     }
                     continue;
                 }
@@ -1073,7 +1090,7 @@ public String(byte bytes[], Charset charset) {
      *
      * @since  1.1
      */
-    public String(byte bytes[], int offset, int length) {
+    public String(byte[] bytes, int offset, int length) {
         this(bytes, offset, length, Charset.defaultCharset());
     }
 
diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
index 26235131aec3d..04ffdf2a84e6a 100644
--- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -54,7 +54,7 @@ public class StringDecode {
     @State(Scope.Thread)
     public static class WithCharset {
 
-        @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "UTF_16"})
+        @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "MS932", "ISO_8859_6"})
         private String charsetName;
 
         private Charset charset;

From a5f610ee3897c10c5ad69f303fd011435c9724be Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 14:48:50 +0100
Subject: [PATCH 09/25] Refactor charset lookups to ensure expected exceptions
 are thrown on null, foo etc

---
 .../share/classes/java/lang/String.java       | 23 +++++++++++++------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 0230f639a7c41..ee484735c805d 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -33,12 +33,7 @@
 import java.lang.constant.ConstantDesc;
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CoderResult;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.MalformedInputException;
+import java.nio.charset.*;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Comparator;
@@ -491,7 +486,21 @@ public String(byte ascii[], int hibyte) {
      */
     public String(byte[] bytes, int offset, int length, String charsetName)
             throws UnsupportedEncodingException {
-        this(bytes, offset, length, StringCoding.lookupCharset(Objects.requireNonNull(charsetName)));
+        this(bytes, offset, length, lookupCharset(charsetName));
+    }
+
+    private static Charset lookupCharset(String charsetName)
+            throws UnsupportedEncodingException {
+        Objects.requireNonNull(charsetName);
+        try {
+            Charset cs = StringCoding.lookupCharset(charsetName);
+            if (cs == null) {
+                throw new UnsupportedEncodingException(charsetName);
+            }
+            return cs;
+        } catch (IllegalCharsetNameException ics) {
+            throw new UnsupportedEncodingException(charsetName);
+        }
     }
 
     /**

From dfce627bf394d656ea277a48d2099e66c1b9fef0 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 15:16:20 +0100
Subject: [PATCH 10/25] Fix charset names in micro settings

---
 src/java.base/share/classes/java/lang/String.java        | 6 ++----
 test/micro/org/openjdk/bench/java/lang/StringDecode.java | 2 +-
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index ee484735c805d..658eb2ccc62df 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -748,8 +748,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
 
                 int en = StringCoding.scale(length, cd.maxCharsPerByte());
                 cd.onMalformedInput(CodingErrorAction.REPLACE)
-                        .onUnmappableCharacter(CodingErrorAction.REPLACE)
-                        .reset();
+                        .onUnmappableCharacter(CodingErrorAction.REPLACE);
                 char[] ca = new char[en];
                 int clen = ad.decode(bytes, offset, length, ca);
                 if (COMPACT_STRINGS) {
@@ -768,8 +767,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             // decode using CharsetDecoder
             int en = StringCoding.scale(length, cd.maxCharsPerByte());
             cd.onMalformedInput(CodingErrorAction.REPLACE)
-                    .onUnmappableCharacter(CodingErrorAction.REPLACE)
-                    .reset();
+                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
             char[] ca = new char[en];
             if (charset.getClass().getClassLoader0() != null &&
                     System.getSecurityManager() != null) {
diff --git a/test/micro/org/openjdk/bench/java/lang/StringDecode.java b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
index 04ffdf2a84e6a..22cc920ead50a 100644
--- a/test/micro/org/openjdk/bench/java/lang/StringDecode.java
+++ b/test/micro/org/openjdk/bench/java/lang/StringDecode.java
@@ -54,7 +54,7 @@ public class StringDecode {
     @State(Scope.Thread)
     public static class WithCharset {
 
-        @Param({"US-ASCII", "ISO_8859_1", "UTF-8", "MS932", "ISO_8859_6"})
+        @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
         private String charsetName;
 
         private Charset charset;

From f14826b11ddfc42c10dbba427edc53aa97e6ed34 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 15:43:27 +0100
Subject: [PATCH 11/25] Remove unused imports

---
 src/java.base/share/classes/java/lang/StringCoding.java | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 0661ea155d116..275b70b0aba62 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -30,7 +30,6 @@
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CoderResult;
@@ -42,17 +41,10 @@
 import java.util.Arrays;
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 import sun.nio.cs.HistoricallyNamedCharset;
-import sun.nio.cs.ArrayDecoder;
 import sun.nio.cs.ArrayEncoder;
 
 import static java.lang.String.LATIN1;
 import static java.lang.String.UTF16;
-import static java.lang.String.COMPACT_STRINGS;
-import static java.lang.Character.isSurrogate;
-import static java.lang.Character.highSurrogate;
-import static java.lang.Character.lowSurrogate;
-import static java.lang.Character.isSupplementaryCodePoint;
-import static java.lang.StringUTF16.putChar;
 
 /**
  * Utility class for string encoding and decoding.

From c8899e1539eead1f101a23cb28bff8d292f82529 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 15 Jan 2021 22:56:56 +0100
Subject: [PATCH 12/25] Outline much of the decode logic back into StringCoding

---
 .../share/classes/java/lang/String.java       | 184 +-----------------
 .../share/classes/java/lang/StringCoding.java | 137 +++++++++++++
 2 files changed, 143 insertions(+), 178 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 658eb2ccc62df..2117d509e63bc 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -58,7 +58,6 @@
 import static java.lang.StringCoding.ISO_8859_1;
 import static java.lang.StringCoding.US_ASCII;
 import static java.lang.StringCoding.UTF_8;
-import static java.lang.StringUTF16.putChar;
 import static java.util.function.Predicate.not;
 
 /**
@@ -224,8 +223,6 @@ public final class String
         COMPACT_STRINGS = true;
     }
 
-    private static final char REPL = '\ufffd';
-
     /**
      * Class String is special cased within the Serialization Stream Protocol.
      *
@@ -563,9 +560,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                                 offset + 1 < sl) {
                             int b2 = bytes[offset + 1];
                             if (!StringCoding.isNotContinuation(b2)) {
-                                dst[dp++] = (byte)(((b1 << 6) ^ b2)^
-                                        (((byte) 0xC0 << 6) ^
-                                        ((byte) 0x80 << 0)));
+                                dst[dp++] = (byte)StringCoding.decode2(b1, b2);
                                 offset += 2;
                                 continue;
                             }
@@ -590,94 +585,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                     StringLatin1.inflate(dst, 0, buf, 0, dp);
                     dst = buf;
                 }
-                while (offset < sl) {
-                    int b1 = bytes[offset++];
-                    if (b1 >= 0) {
-                        putChar(dst, dp++, (char) b1);
-                    } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                        if (offset < sl) {
-                            int b2 = bytes[offset++];
-                            if (StringCoding.isNotContinuation(b2)) {
-                                putChar(dst, dp++, REPL);
-                                offset--;
-                            } else {
-                                putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
-                                        (((byte) 0xC0 << 6) ^
-                                        ((byte) 0x80 << 0))));
-                            }
-                            continue;
-                        }
-                        putChar(dst, dp++, REPL);
-                        break;
-                    } else if ((b1 >> 4) == -2) {
-                        if (offset + 1 < sl) {
-                            int b2 = bytes[offset++];
-                            int b3 = bytes[offset++];
-                            if (StringCoding.isMalformed3(b1, b2, b3)) {
-                                putChar(dst, dp++, REPL);
-                                offset -= 3;
-                                offset += StringCoding.malformedN(bytes, offset, 3);
-                            } else {
-                                char c = (char)((b1 << 12) ^
-                                                (b2 <<  6) ^
-                                                (b3 ^
-                                                 (((byte) 0xE0 << 12) ^
-                                                  ((byte) 0x80 <<  6) ^
-                                                  ((byte) 0x80 <<  0))));
-                                if (Character.isSurrogate(c)) {
-                                    putChar(dst, dp++, REPL);
-                                } else {
-                                    putChar(dst, dp++, c);
-                                }
-                            }
-                            continue;
-                        }
-                        if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
-                            putChar(dst, dp++, REPL);
-                            continue;
-                        }
-                        putChar(dst, dp++, REPL);
-                        break;
-                    } else if ((b1 >> 3) == -2) {
-                        if (offset + 2 < sl) {
-                            int b2 = bytes[offset++];
-                            int b3 = bytes[offset++];
-                            int b4 = bytes[offset++];
-                            int uc = ((b1 << 18) ^
-                                      (b2 << 12) ^
-                                      (b3 <<  6) ^
-                                      (b4 ^
-                                       (((byte) 0xF0 << 18) ^
-                                        ((byte) 0x80 << 12) ^
-                                        ((byte) 0x80 <<  6) ^
-                                        ((byte) 0x80 <<  0))));
-                            if (StringCoding.isMalformed4(b2, b3, b4) ||
-                                    !Character.isSupplementaryCodePoint(uc)) { // shortest form check
-                                putChar(dst, dp++, REPL);
-                                offset -= 4;
-                                offset += StringCoding.malformedN(bytes, offset, 4);
-                            } else {
-                                putChar(dst, dp++, Character.highSurrogate(uc));
-                                putChar(dst, dp++, Character.lowSurrogate(uc));
-                            }
-                            continue;
-                        }
-                        b1 &= 0xff;
-                        if (b1 > 0xf4 ||
-                                offset  < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
-                            putChar(dst, dp++, REPL);
-                            continue;
-                        }
-                        offset++;
-                        putChar(dst, dp++, REPL);
-                        if (offset  < sl && StringCoding.isMalformed4_3(bytes[offset])) {
-                            continue;
-                        }
-                        break;
-                    } else {
-                        putChar(dst, dp++, REPL);
-                    }
-                }
+                dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
                 if (dp != length) {
                     dst = Arrays.copyOf(dst, dp << 1);
                 }
@@ -701,7 +609,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 int dp = 0;
                 while (dp < length) {
                     int b = bytes[offset++];
-                    putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
+                    StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : StringCoding.REPL);
                 }
                 this.value = dst;
                 this.coder = UTF16;
@@ -906,14 +814,12 @@ private String(byte[] bytes, int offset, int length, Void throwOnError) {
                         offset + 1 < sl) {
                     int b2 = bytes[offset + 1];
                     if (!StringCoding.isNotContinuation(b2)) {
-                        dst[dp++] = (byte)(((b1 << 6) ^ b2)^
-                                (((byte) 0xC0 << 6) ^
-                                        ((byte) 0x80 << 0)));
+                        dst[dp++] = (byte)StringCoding.decode2(b1, b2);
                         offset += 2;
                         continue;
                     }
                 }
-                // anything not a latin1, including the repl
+                // anything not a latin1, including the REPL
                 // we have to go with the utf16
                 break;
             }
@@ -933,85 +839,7 @@ private String(byte[] bytes, int offset, int length, Void throwOnError) {
             StringLatin1.inflate(dst, 0, buf, 0, dp);
             dst = buf;
         }
-        while (offset < sl) {
-            int b1 = bytes[offset++];
-            if (b1 >= 0) {
-                putChar(dst, dp++, (char) b1);
-            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                if (offset < sl) {
-                    int b2 = bytes[offset++];
-                    if (StringCoding.isNotContinuation(b2)) {
-                        StringCoding.throwMalformed(offset - 1, 1);
-                    } else {
-                        putChar(dst, dp++, (char)(((b1 << 6) ^ b2)^
-                                (((byte) 0xC0 << 6) ^
-                                        ((byte) 0x80 << 0))));
-                    }
-                    continue;
-                }
-                StringCoding.throwMalformed(offset, 1);  // underflow()
-                break;
-            } else if ((b1 >> 4) == -2) {
-                if (offset + 1 < sl) {
-                    int b2 = bytes[offset++];
-                    int b3 = bytes[offset++];
-                    if (StringCoding.isMalformed3(b1, b2, b3)) {
-                        StringCoding.throwMalformed(offset - 3, 3);
-                    } else {
-                        char c = (char)((b1 << 12) ^
-                                (b2 <<  6) ^
-                                (b3 ^
-                                        (((byte) 0xE0 << 12) ^
-                                        ((byte) 0x80 <<  6) ^
-                                        ((byte) 0x80 <<  0))));
-                        if (Character.isSurrogate(c)) {
-                            StringCoding.throwMalformed(offset - 3, 3);
-                        } else {
-                            putChar(dst, dp++, c);
-                        }
-                    }
-                    continue;
-                }
-                if (offset  < sl && StringCoding.isMalformed3_2(b1, bytes[offset])) {
-                    StringCoding.throwMalformed(offset - 1, 2);
-                    continue;
-                }
-                StringCoding.throwMalformed(offset, 1);
-                break;
-            } else if ((b1 >> 3) == -2) {
-                if (offset + 2 < sl) {
-                    int b2 = bytes[offset++];
-                    int b3 = bytes[offset++];
-                    int b4 = bytes[offset++];
-                    int uc = ((b1 << 18) ^
-                              (b2 << 12) ^
-                              (b3 <<  6) ^
-                              (b4 ^
-                               (((byte) 0xF0 << 18) ^
-                               ((byte) 0x80 << 12) ^
-                               ((byte) 0x80 <<  6) ^
-                               ((byte) 0x80 <<  0))));
-                    if (StringCoding.isMalformed4(b2, b3, b4) ||
-                            !Character.isSupplementaryCodePoint(uc)) { // shortest form check
-                        StringCoding.throwMalformed(offset - 4, 4);
-                    } else {
-                        putChar(dst, dp++, Character.highSurrogate(uc));
-                        putChar(dst, dp++, Character.lowSurrogate(uc));
-                    }
-                    continue;
-                }
-                b1 &= 0xff;
-                if (b1 > 0xf4 ||
-                    offset < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
-                    StringCoding.throwMalformed(offset - 1, 1);  // or 2
-                    continue;
-                }
-                StringCoding.throwMalformed(offset - 1, 1);
-                break;
-            } else {
-                StringCoding.throwMalformed(offset - 1, 1);
-            }
-        }
+        dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
         if (dp != length) {
             dst = Arrays.copyOf(dst, dp << 1);
         }
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 275b70b0aba62..6385d536cb2ff 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -45,6 +45,7 @@
 
 import static java.lang.String.LATIN1;
 import static java.lang.String.UTF16;
+import static java.lang.StringUTF16.putChar;
 
 /**
  * Utility class for string encoding and decoding.
@@ -62,6 +63,8 @@ private StringCoding() { }
     static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
     static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
 
+    static final char REPL = '\ufffd';
+
     private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
         SoftReference<T> sr = tl.get();
         if (sr == null)
@@ -399,6 +402,140 @@ static boolean isMalformed4_3(int b3) {
         return (b3 & 0xc0) != 0x80;
     }
 
+    static char decode2(int b1, int b2) {
+        return (char)(((b1 << 6) ^ b2)^
+                (((byte) 0xC0 << 6) ^
+                ((byte) 0x80 << 0)));
+    }
+
+    static char decode3(int b1, int b2, int b3) {
+        return (char)((b1 << 12) ^
+                        (b2 <<  6) ^
+                        (b3 ^
+                         (((byte) 0xE0 << 12) ^
+                          ((byte) 0x80 <<  6) ^
+                          ((byte) 0x80 <<  0))));
+    }
+
+    static int decode4(int b1, int b2, int b3, int b4) {
+        return ((b1 << 18) ^
+                (b2 << 12) ^
+                (b3 <<  6) ^
+                (b4 ^
+                 (((byte) 0xF0 << 18) ^
+                  ((byte) 0x80 << 12) ^
+                  ((byte) 0x80 <<  6) ^
+                  ((byte) 0x80 <<  0))));
+    }
+
+    static int decodeUTF8_UTF16(byte[] bytes, int offset, int sl, byte[] dst, int dp, boolean doReplace) {
+        while (offset < sl) {
+            int b1 = bytes[offset++];
+            if (b1 >= 0) {
+                putChar(dst, dp++, (char) b1);
+            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                if (offset < sl) {
+                    int b2 = bytes[offset++];
+                    if (StringCoding.isNotContinuation(b2)) {
+                        if (!doReplace) {
+                            throwMalformed(offset - 1, 1);
+                        }
+                        putChar(dst, dp++, REPL);
+                        offset--;
+                    } else {
+                        putChar(dst, dp++, decode2(b1, b2));
+                    }
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(offset, 1);  // underflow()
+                }
+                putChar(dst, dp++, REPL);
+                break;
+            } else if ((b1 >> 4) == -2) {
+                if (offset + 1 < sl) {
+                    int b2 = bytes[offset++];
+                    int b3 = bytes[offset++];
+                    if (isMalformed3(b1, b2, b3)) {
+                        if (!doReplace) {
+                            throwMalformed(offset - 3, 3);
+                        }
+                        putChar(dst, dp++, REPL);
+                        offset -= 3;
+                        offset += malformedN(bytes, offset, 3);
+                    } else {
+                        char c = decode3(b1, b2, b3);
+                        if (Character.isSurrogate(c)) {
+                            if (!doReplace) {
+                                throwMalformed(offset - 3, 3);
+                            }
+                            putChar(dst, dp++, REPL);
+                        } else {
+                            putChar(dst, dp++, c);
+                        }
+                    }
+                    continue;
+                }
+                if (offset < sl && isMalformed3_2(b1, bytes[offset])) {
+                    if (!doReplace) {
+                        throwMalformed(offset - 1, 2);
+                    }
+                    putChar(dst, dp++, REPL);
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(offset, 1);
+                }
+                putChar(dst, dp++, REPL);
+                break;
+            } else if ((b1 >> 3) == -2) {
+                if (offset + 2 < sl) {
+                    int b2 = bytes[offset++];
+                    int b3 = bytes[offset++];
+                    int b4 = bytes[offset++];
+                    int uc = decode4(b1, b2, b3, b4);
+                    if (isMalformed4(b2, b3, b4) ||
+                            !Character.isSupplementaryCodePoint(uc)) { // shortest form check
+                        if (!doReplace) {
+                            throwMalformed(offset - 4, 4);
+                        }
+                        putChar(dst, dp++, REPL);
+                        offset -= 4;
+                        offset += StringCoding.malformedN(bytes, offset, 4);
+                    } else {
+                        putChar(dst, dp++, Character.highSurrogate(uc));
+                        putChar(dst, dp++, Character.lowSurrogate(uc));
+                    }
+                    continue;
+                }
+                b1 &= 0xff;
+                if (b1 > 0xf4 ||
+                        offset  < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
+                    if (!doReplace) {
+                        throwMalformed(offset - 1, 1);  // or 2
+                    }
+                    putChar(dst, dp++, REPL);
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(offset - 1, 1);
+                }
+                offset++;
+                putChar(dst, dp++, REPL);
+                if (offset < sl && StringCoding.isMalformed4_3(bytes[offset])) {
+                    continue;
+                }
+                break;
+            } else {
+                if (!doReplace) {
+                    throwMalformed(offset - 1, 1);
+                }
+                putChar(dst, dp++, REPL);
+            }
+        }
+        return dp;
+    }
+
     // for nb == 3/4
     static int malformedN(byte[] src, int sp, int nb) {
         if (nb == 3) {

From f328f4516541170c23ffc8d6966b7572c38c939d Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Sat, 16 Jan 2021 00:37:26 +0100
Subject: [PATCH 13/25] Copyrights

---
 src/java.base/share/classes/java/lang/String.java | 2 +-
 src/java.base/share/classes/java/lang/System.java | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 2117d509e63bc..7e4077d16fb59 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it
diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
index 91fbadc4d1bfa..e536818fbf6e9 100644
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1994, 2020, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1994, 2021, Oracle and/or its affiliates. All rights reserved.
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
  *
  * This code is free software; you can redistribute it and/or modify it

From eb9c8507ab00ef57aed7e509ea81c5c3657401f7 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Sat, 16 Jan 2021 02:10:31 +0100
Subject: [PATCH 14/25] Further simplifications

---
 .../share/classes/java/lang/String.java       | 140 ++++++------------
 .../share/classes/java/lang/StringCoding.java |  18 +++
 2 files changed, 67 insertions(+), 91 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 7e4077d16fb59..3a6415fb938f4 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -683,7 +683,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 offset = 0;
             }
 
-            int caLen = decode(cd, ca, bytes, offset, length);
+            int caLen = StringCoding.decodeWithDecoder(cd, ca, bytes, offset, length);
             if (COMPACT_STRINGS) {
                 byte[] bs = StringUTF16.compress(ca, 0, caLen);
                 if (bs != null) {
@@ -697,31 +697,54 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
         }
     }
 
-    private static int decode(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
-        ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
-        CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
-        try {
-            CoderResult cr = cd.decode(bb, cb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = cd.flush(cb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            // Substitution is always enabled,
-            // so this shouldn't happen
-            throw new Error(x);
-        }
-        return cb.position();
-    }
-
-    ////////////////////// for j.u.z.ZipCoder //////////////////////////
-
     /*
      * Throws iae, instead of replacing, if malformed or unmappable.
      */
-    static String newStringUTF8NoRepl(byte[] src, int off, int len) {
-        return new String(src, off, len, (Void)null);
+    static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
+        checkBoundsOffCount(offset, length, bytes.length);
+        int sl = offset + length;
+        int dp = 0;
+        byte[] dst = new byte[length];
+        if (COMPACT_STRINGS) {
+            while (offset < sl) {
+                int b1 = bytes[offset];
+                if (b1 >= 0) {
+                    dst[dp++] = (byte)b1;
+                    offset++;
+                    continue;
+                }
+                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
+                        offset + 1 < sl) {
+                    int b2 = bytes[offset + 1];
+                    if (!StringCoding.isNotContinuation(b2)) {
+                        dst[dp++] = (byte)StringCoding.decode2(b1, b2);
+                        offset += 2;
+                        continue;
+                    }
+                }
+                // anything not a latin1, including the REPL
+                // we have to go with the utf16
+                break;
+            }
+            if (offset == sl) {
+                if (dp != dst.length) {
+                    dst = Arrays.copyOf(dst, dp);
+                }
+                return new String(dst, LATIN1);
+            }
+        }
+        if (dp == 0) {
+            dst = new byte[length << 1];
+        } else {
+            byte[] buf = new byte[length << 1];
+            StringLatin1.inflate(dst, 0, buf, 0, dp);
+            dst = buf;
+        }
+        dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
+        if (dp != length) {
+            dst = Arrays.copyOf(dst, dp << 1);
+        }
+        return new String(dst, UTF16);
     }
 
     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {
@@ -772,79 +795,14 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
                 System.getSecurityManager() != null) {
             src = Arrays.copyOf(src, len);
         }
-        ByteBuffer bb = ByteBuffer.wrap(src);
-        CharBuffer cb = CharBuffer.wrap(ca);
-        try {
-            CoderResult cr = cd.decode(bb, cb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = cd.flush(cb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new IllegalArgumentException(x);
-        }
+        int caLen = StringCoding.decodeWithDecoder(cd, ca, src, 0, src.length);
         if (COMPACT_STRINGS) {
-            byte[] bs = StringUTF16.compress(ca, 0, cb.position());
+            byte[] bs = StringUTF16.compress(ca, 0, caLen);
             if (bs != null) {
                 return new String(bs, LATIN1);
             }
         }
-        return new String(StringUTF16.toBytes(ca, 0, cb.position()), UTF16);
-    }
-
-    /*
-     * Private constructor for doing UTF-8 decode, but throwing iae on malformed or
-     * unmappable characters
-     */
-    private String(byte[] bytes, int offset, int length, Void throwOnError) {
-        checkBoundsOffCount(offset, length, bytes.length);
-        int sl = offset + length;
-        int dp = 0;
-        byte[] dst = new byte[length];
-        if (COMPACT_STRINGS) {
-            while (offset < sl) {
-                int b1 = bytes[offset];
-                if (b1 >= 0) {
-                    dst[dp++] = (byte)b1;
-                    offset++;
-                    continue;
-                }
-                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
-                        offset + 1 < sl) {
-                    int b2 = bytes[offset + 1];
-                    if (!StringCoding.isNotContinuation(b2)) {
-                        dst[dp++] = (byte)StringCoding.decode2(b1, b2);
-                        offset += 2;
-                        continue;
-                    }
-                }
-                // anything not a latin1, including the REPL
-                // we have to go with the utf16
-                break;
-            }
-            if (offset == sl) {
-                if (dp != dst.length) {
-                    dst = Arrays.copyOf(dst, dp);
-                }
-                this.value = dst;
-                this.coder = LATIN1;
-                return;
-            }
-        }
-        if (dp == 0) {
-            dst = new byte[length << 1];
-        } else {
-            byte[] buf = new byte[length << 1];
-            StringLatin1.inflate(dst, 0, buf, 0, dp);
-            dst = buf;
-        }
-        dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
-        if (dp != length) {
-            dst = Arrays.copyOf(dst, dp << 1);
-        }
-        this.value = dst;
-        this.coder = UTF16;
+        return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
     }
 
     /**
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 6385d536cb2ff..f869852d536fb 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -536,6 +536,24 @@ static int decodeUTF8_UTF16(byte[] bytes, int offset, int sl, byte[] dst, int dp
         return dp;
     }
 
+    static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
+        ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
+        CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            // Substitution is always enabled,
+            // so this shouldn't happen
+            throw new Error(x);
+        }
+        return cb.position();
+    }
+
     // for nb == 3/4
     static int malformedN(byte[] src, int sp, int nb) {
         if (nb == 3) {

From 4c8eacd1d78cec422ca236c0f1f6aafbe3bc11c5 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Sat, 16 Jan 2021 02:14:23 +0100
Subject: [PATCH 15/25] Add missing import (who needs IDEs?)

---
 src/java.base/share/classes/java/lang/StringCoding.java | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index f869852d536fb..9c0029939fe64 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -30,6 +30,7 @@
 import java.nio.ByteBuffer;
 import java.nio.CharBuffer;
 import java.nio.charset.Charset;
+import java.nio.charset.CharsetDecoder;
 import java.nio.charset.CharsetEncoder;
 import java.nio.charset.CharacterCodingException;
 import java.nio.charset.CoderResult;

From 790e746342826ae18bcc169cbb9c7d024a592937 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Sun, 17 Jan 2021 13:03:47 +0100
Subject: [PATCH 16/25] Simplify lookupCharset

---
 .../share/classes/java/lang/String.java       | 16 +------
 .../share/classes/java/lang/StringCoding.java | 47 ++++++++-----------
 2 files changed, 20 insertions(+), 43 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 3a6415fb938f4..c133b995b87ac 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -483,21 +483,7 @@ public String(byte ascii[], int hibyte) {
      */
     public String(byte[] bytes, int offset, int length, String charsetName)
             throws UnsupportedEncodingException {
-        this(bytes, offset, length, lookupCharset(charsetName));
-    }
-
-    private static Charset lookupCharset(String charsetName)
-            throws UnsupportedEncodingException {
-        Objects.requireNonNull(charsetName);
-        try {
-            Charset cs = StringCoding.lookupCharset(charsetName);
-            if (cs == null) {
-                throw new UnsupportedEncodingException(charsetName);
-            }
-            return cs;
-        } catch (IllegalCharsetNameException ics) {
-            throw new UnsupportedEncodingException(charsetName);
-        }
+        this(bytes, offset, length, StringCoding.lookupCharset(charsetName));
     }
 
     /**
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 9c0029939fe64..5602fd8e1c155 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -40,6 +40,8 @@
 import java.nio.charset.UnmappableCharacterException;
 import java.nio.charset.UnsupportedCharsetException;
 import java.util.Arrays;
+import java.util.Objects;
+
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 import sun.nio.cs.HistoricallyNamedCharset;
 import sun.nio.cs.ArrayEncoder;
@@ -91,15 +93,13 @@ static int scale(int len, float expansionFactor) {
         return (int)(len * (double)expansionFactor);
     }
 
-    static Charset lookupCharset(String csn) {
-        if (Charset.isSupported(csn)) {
-            try {
-                return Charset.forName(csn);
-            } catch (UnsupportedCharsetException x) {
-                throw new Error(x);
-            }
+    static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
+        Objects.requireNonNull(csn);
+        try {
+            return Charset.forName(csn);
+        } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
+            throw new UnsupportedEncodingException(csn);
         }
-        return null;
     }
 
     @IntrinsicCandidate
@@ -181,32 +181,23 @@ byte[] encode(byte coder, byte[] val) {
         }
     }
 
-    static byte[] encode(String charsetName, byte coder, byte[] val)
+    static byte[] encode(String csn, byte coder, byte[] val)
         throws UnsupportedEncodingException
     {
         StringEncoder se = deref(encoder);
-        String csn = (charsetName == null) ? "ISO-8859-1" : charsetName;
         if ((se == null) || !(csn.equals(se.requestedCharsetName())
                               || csn.equals(se.charsetName()))) {
-            se = null;
-            try {
-                Charset cs = lookupCharset(csn);
-                if (cs != null) {
-                    if (cs == UTF_8) {
-                        return encodeUTF8(coder, val, true);
-                    }
-                    if (cs == ISO_8859_1) {
-                        return encode8859_1(coder, val);
-                    }
-                    if (cs == US_ASCII) {
-                        return encodeASCII(coder, val);
-                    }
-                    se = new StringEncoder(cs, csn);
-                }
-            } catch (IllegalCharsetNameException x) {}
-            if (se == null) {
-                throw new UnsupportedEncodingException (csn);
+            Charset cs = lookupCharset(csn);
+            if (cs == UTF_8) {
+                return encodeUTF8(coder, val, true);
+            }
+            if (cs == ISO_8859_1) {
+                return encode8859_1(coder, val);
+            }
+            if (cs == US_ASCII) {
+                return encodeASCII(coder, val);
             }
+            se = new StringEncoder(cs, csn);
             set(encoder, se);
         }
         return se.encode(coder, val);

From e870b3bb8b4a6ee45878078a0c2fb8bda8c9a140 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Sun, 17 Jan 2021 23:38:46 +0100
Subject: [PATCH 17/25] ASCII fast-path missing for UTF-8 NoRepl methods

---
 .../share/classes/java/lang/String.java       | 85 ++++++++++---------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index c133b995b87ac..b96884f82cb99 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -532,9 +532,9 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             } else {
                 int sl = offset + length;
                 int dp = 0;
-                byte[] dst = new byte[length];
-
+                byte[] dst = null;
                 if (COMPACT_STRINGS) {
+                    dst = new byte[length];
                     while (offset < sl) {
                         int b1 = bytes[offset];
                         if (b1 >= 0) {
@@ -564,7 +564,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                         return;
                     }
                 }
-                if (dp == 0) {
+                if (dp == 0 || dst == null) {
                     dst = new byte[length << 1];
                 } else {
                     byte[] buf = new byte[length << 1];
@@ -688,49 +688,54 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
      */
     static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
         checkBoundsOffCount(offset, length, bytes.length);
-        int sl = offset + length;
-        int dp = 0;
-        byte[] dst = new byte[length];
-        if (COMPACT_STRINGS) {
-            while (offset < sl) {
-                int b1 = bytes[offset];
-                if (b1 >= 0) {
-                    dst[dp++] = (byte)b1;
-                    offset++;
-                    continue;
-                }
-                if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
-                        offset + 1 < sl) {
-                    int b2 = bytes[offset + 1];
-                    if (!StringCoding.isNotContinuation(b2)) {
-                        dst[dp++] = (byte)StringCoding.decode2(b1, b2);
-                        offset += 2;
+        if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
+            return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
+        } else {
+            int sl = offset + length;
+            int dp = 0;
+            byte[] dst = null;
+            if (COMPACT_STRINGS) {
+                dst = new byte[length];
+                while (offset < sl) {
+                    int b1 = bytes[offset];
+                    if (b1 >= 0) {
+                        dst[dp++] = (byte) b1;
+                        offset++;
                         continue;
                     }
+                    if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
+                            offset + 1 < sl) {
+                        int b2 = bytes[offset + 1];
+                        if (!StringCoding.isNotContinuation(b2)) {
+                            dst[dp++] = (byte) StringCoding.decode2(b1, b2);
+                            offset += 2;
+                            continue;
+                        }
+                    }
+                    // anything not a latin1, including the REPL
+                    // we have to go with the utf16
+                    break;
                 }
-                // anything not a latin1, including the REPL
-                // we have to go with the utf16
-                break;
-            }
-            if (offset == sl) {
-                if (dp != dst.length) {
-                    dst = Arrays.copyOf(dst, dp);
+                if (offset == sl) {
+                    if (dp != dst.length) {
+                        dst = Arrays.copyOf(dst, dp);
+                    }
+                    return new String(dst, LATIN1);
                 }
-                return new String(dst, LATIN1);
             }
+            if (dp == 0 || dst == null) {
+                dst = new byte[length << 1];
+            } else {
+                byte[] buf = new byte[length << 1];
+                StringLatin1.inflate(dst, 0, buf, 0, dp);
+                dst = buf;
+            }
+            dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
+            if (dp != length) {
+                dst = Arrays.copyOf(dst, dp << 1);
+            }
+            return new String(dst, UTF16);
         }
-        if (dp == 0) {
-            dst = new byte[length << 1];
-        } else {
-            byte[] buf = new byte[length << 1];
-            StringLatin1.inflate(dst, 0, buf, 0, dp);
-            dst = buf;
-        }
-        dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
-        if (dp != length) {
-            dst = Arrays.copyOf(dst, dp << 1);
-        }
-        return new String(dst, UTF16);
     }
 
     static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingException {

From ba279a7e9ea840c3e9539bbfe3aaccd93f0e2748 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Mon, 18 Jan 2021 10:22:43 +0100
Subject: [PATCH 18/25] Harmonize empty string checking in newString methods

---
 src/java.base/share/classes/java/lang/String.java | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index b96884f82cb99..b7289f4222859 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -688,6 +688,9 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
      */
     static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
         checkBoundsOffCount(offset, length, bytes.length);
+        if (length == 0) {
+            return "";
+        }
         if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
             return new String(Arrays.copyOfRange(bytes, offset, offset + length), LATIN1);
         } else {
@@ -752,6 +755,10 @@ static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingExce
     }
 
     static String newStringNoRepl1(byte[] src, Charset cs) {
+        int len = src.length;
+        if (len == 0) {
+            return "";
+        }
         if (cs == UTF_8) {
             return newStringUTF8NoRepl(src, 0, src.length);
         }
@@ -776,10 +783,6 @@ static String newStringNoRepl1(byte[] src, Charset cs) {
                 ad.isASCIICompatible() && !StringCoding.hasNegatives(src, 0, src.length)) {
             return new String(src, 0, src.length, ISO_8859_1);
         }
-        int len = src.length;
-        if (len == 0) {
-            return "";
-        }
         int en = StringCoding.scale(len, cd.maxCharsPerByte());
         char[] ca = new char[en];
         if (cs.getClass().getClassLoader0() != null &&

From c37aa600d54edf11a3f571b72693de8acaf3fecb Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Tue, 19 Jan 2021 22:06:03 +0100
Subject: [PATCH 19/25] More privates

---
 .../share/classes/java/lang/StringCoding.java | 29 ++++++++++---------
 1 file changed, 15 insertions(+), 14 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 5602fd8e1c155..32d72b9823305 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -365,51 +365,52 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
 
     //////////////////////////////// utf8 ////////////////////////////////////
 
+
     static boolean isNotContinuation(int b) {
         return (b & 0xc0) != 0x80;
     }
 
-    static boolean isMalformed3(int b1, int b2, int b3) {
+    private static boolean isMalformed3(int b1, int b2, int b3) {
         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
                (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
     }
 
-    static boolean isMalformed3_2(int b1, int b2) {
+    private static boolean isMalformed3_2(int b1, int b2) {
         return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
                (b2 & 0xc0) != 0x80;
     }
 
-    static boolean isMalformed4(int b2, int b3, int b4) {
+    private static boolean isMalformed4(int b2, int b3, int b4) {
         return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
                (b4 & 0xc0) != 0x80;
     }
 
-    static boolean isMalformed4_2(int b1, int b2) {
+    private static boolean isMalformed4_2(int b1, int b2) {
         return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
                (b2 & 0xc0) != 0x80;
     }
 
-    static boolean isMalformed4_3(int b3) {
+    private static boolean isMalformed4_3(int b3) {
         return (b3 & 0xc0) != 0x80;
     }
 
     static char decode2(int b1, int b2) {
-        return (char)(((b1 << 6) ^ b2)^
+        return (char)(((b1 << 6) ^ b2) ^
                 (((byte) 0xC0 << 6) ^
-                ((byte) 0x80 << 0)));
+                 ((byte) 0x80 << 0)));
     }
 
-    static char decode3(int b1, int b2, int b3) {
+    private static char decode3(int b1, int b2, int b3) {
         return (char)((b1 << 12) ^
-                        (b2 <<  6) ^
-                        (b3 ^
-                         (((byte) 0xE0 << 12) ^
-                          ((byte) 0x80 <<  6) ^
-                          ((byte) 0x80 <<  0))));
+                      (b2 <<  6) ^
+                      (b3 ^
+                       (((byte) 0xE0 << 12) ^
+                        ((byte) 0x80 <<  6) ^
+                        ((byte) 0x80 <<  0))));
     }
 
-    static int decode4(int b1, int b2, int b3, int b4) {
+    private static int decode4(int b1, int b2, int b3, int b4) {
         return ((b1 << 18) ^
                 (b2 << 12) ^
                 (b3 <<  6) ^

From 869bc109be2fa6b04b20dd7a3401f0ce6ab000f9 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 21 Jan 2021 12:36:07 +0100
Subject: [PATCH 20/25] More cleanups, make all things private that can be

---
 .../share/classes/java/lang/String.java       |   9 +-
 .../share/classes/java/lang/StringCoding.java | 100 +++++++++---------
 2 files changed, 50 insertions(+), 59 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index b7289f4222859..7608452b8d1d7 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -609,13 +609,6 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             // (2)The defensive copy of the input byte/char[] has a big performance
             // impact, as well as the outgoing result byte/char[]. Need to do the
             // optimization check of (sm==null && classLoader0==null) for both.
-            // (3)There might be a timing gap in isTrusted setting. getClassLoader0()
-            // is only checked (and then isTrusted gets set) when (SM==null). It is
-            // possible that the SM==null for now but then SM is NOT null later
-            // when safeTrim() is invoked...the "safe" way to do is to redundant
-            // check (... && (isTrusted || SM == null || getClassLoader0())) in trim
-            // but it then can be argued that the SM is null when the operation
-            // is started...
             CharsetDecoder cd = charset.newDecoder();
             // ArrayDecoder fastpaths
             if (cd instanceof ArrayDecoder ad) {
@@ -754,7 +747,7 @@ static String newStringNoRepl(byte[] src, Charset cs) throws CharacterCodingExce
         }
     }
 
-    static String newStringNoRepl1(byte[] src, Charset cs) {
+    private static String newStringNoRepl1(byte[] src, Charset cs) {
         int len = src.length;
         if (len == 0) {
             return "";
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 32d72b9823305..b455e167bf7fc 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -131,17 +131,17 @@ private StringEncoder(Charset cs, String rcn) {
                     ((ArrayEncoder)ce).isASCIICompatible();
         }
 
-        String charsetName() {
+        private String charsetName() {
             if (cs instanceof HistoricallyNamedCharset)
                 return ((HistoricallyNamedCharset)cs).historicalName();
             return cs.name();
         }
 
-        final String requestedCharsetName() {
+        private final String requestedCharsetName() {
             return requestedCharsetName;
         }
 
-        byte[] encode(byte coder, byte[] val) {
+        private byte[] encode(byte coder, byte[] val) {
             // fastpath for ascii compatible
             if (coder == LATIN1 && isASCIICompatible &&
                 !hasNegatives(val, 0, val.length)) {
@@ -346,7 +346,7 @@ private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
             dp = dp + ret;
             if (ret != len) {
                 if (!doReplace) {
-                    throwUnmappable(sp, 1);
+                    throwUnmappable(sp);
                 }
                 char c = StringUTF16.getChar(val, sp++);
                 if (Character.isHighSurrogate(c) && sp < sl &&
@@ -386,7 +386,7 @@ private static boolean isMalformed4(int b2, int b3, int b4) {
     }
 
     private static boolean isMalformed4_2(int b1, int b2) {
-        return (b1 == 0xf0 && (b2  < 0x90 || b2 > 0xbf)) ||
+        return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
                (b2 & 0xc0) != 0x80;
     }
@@ -397,8 +397,8 @@ private static boolean isMalformed4_3(int b3) {
 
     static char decode2(int b1, int b2) {
         return (char)(((b1 << 6) ^ b2) ^
-                (((byte) 0xC0 << 6) ^
-                 ((byte) 0x80 << 0)));
+                      (((byte) 0xC0 << 6) ^
+                       ((byte) 0x80 << 0)));
     }
 
     private static char decode3(int b1, int b2, int b3) {
@@ -421,46 +421,46 @@ private static int decode4(int b1, int b2, int b3, int b4) {
                   ((byte) 0x80 <<  0))));
     }
 
-    static int decodeUTF8_UTF16(byte[] bytes, int offset, int sl, byte[] dst, int dp, boolean doReplace) {
-        while (offset < sl) {
-            int b1 = bytes[offset++];
+    static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
+        while (sp < sl) {
+            int b1 = src[sp++];
             if (b1 >= 0) {
                 putChar(dst, dp++, (char) b1);
             } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                if (offset < sl) {
-                    int b2 = bytes[offset++];
+                if (sp < sl) {
+                    int b2 = src[sp++];
                     if (StringCoding.isNotContinuation(b2)) {
                         if (!doReplace) {
-                            throwMalformed(offset - 1, 1);
+                            throwMalformed(sp - 1, 1);
                         }
                         putChar(dst, dp++, REPL);
-                        offset--;
+                        sp--;
                     } else {
                         putChar(dst, dp++, decode2(b1, b2));
                     }
                     continue;
                 }
                 if (!doReplace) {
-                    throwMalformed(offset, 1);  // underflow()
+                    throwMalformed(sp, 1);  // underflow()
                 }
                 putChar(dst, dp++, REPL);
                 break;
             } else if ((b1 >> 4) == -2) {
-                if (offset + 1 < sl) {
-                    int b2 = bytes[offset++];
-                    int b3 = bytes[offset++];
+                if (sp + 1 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
                     if (isMalformed3(b1, b2, b3)) {
                         if (!doReplace) {
-                            throwMalformed(offset - 3, 3);
+                            throwMalformed(sp - 3, 3);
                         }
                         putChar(dst, dp++, REPL);
-                        offset -= 3;
-                        offset += malformedN(bytes, offset, 3);
+                        sp -= 3;
+                        sp += malformedN(src, sp, 3);
                     } else {
                         char c = decode3(b1, b2, b3);
                         if (Character.isSurrogate(c)) {
                             if (!doReplace) {
-                                throwMalformed(offset - 3, 3);
+                                throwMalformed(sp - 3, 3);
                             }
                             putChar(dst, dp++, REPL);
                         } else {
@@ -469,32 +469,32 @@ static int decodeUTF8_UTF16(byte[] bytes, int offset, int sl, byte[] dst, int dp
                     }
                     continue;
                 }
-                if (offset < sl && isMalformed3_2(b1, bytes[offset])) {
+                if (sp < sl && isMalformed3_2(b1, src[sp])) {
                     if (!doReplace) {
-                        throwMalformed(offset - 1, 2);
+                        throwMalformed(sp - 1, 2);
                     }
                     putChar(dst, dp++, REPL);
                     continue;
                 }
                 if (!doReplace) {
-                    throwMalformed(offset, 1);
+                    throwMalformed(sp, 1);
                 }
                 putChar(dst, dp++, REPL);
                 break;
             } else if ((b1 >> 3) == -2) {
-                if (offset + 2 < sl) {
-                    int b2 = bytes[offset++];
-                    int b3 = bytes[offset++];
-                    int b4 = bytes[offset++];
+                if (sp + 2 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
+                    int b4 = src[sp++];
                     int uc = decode4(b1, b2, b3, b4);
                     if (isMalformed4(b2, b3, b4) ||
                             !Character.isSupplementaryCodePoint(uc)) { // shortest form check
                         if (!doReplace) {
-                            throwMalformed(offset - 4, 4);
+                            throwMalformed(sp - 4, 4);
                         }
                         putChar(dst, dp++, REPL);
-                        offset -= 4;
-                        offset += StringCoding.malformedN(bytes, offset, 4);
+                        sp -= 4;
+                        sp += StringCoding.malformedN(src, sp, 4);
                     } else {
                         putChar(dst, dp++, Character.highSurrogate(uc));
                         putChar(dst, dp++, Character.lowSurrogate(uc));
@@ -502,26 +502,25 @@ static int decodeUTF8_UTF16(byte[] bytes, int offset, int sl, byte[] dst, int dp
                     continue;
                 }
                 b1 &= 0xff;
-                if (b1 > 0xf4 ||
-                        offset  < sl && StringCoding.isMalformed4_2(b1, bytes[offset] & 0xff)) {
+                if (b1 > 0xf4 || sp < sl && StringCoding.isMalformed4_2(b1, src[sp] & 0xff)) {
                     if (!doReplace) {
-                        throwMalformed(offset - 1, 1);  // or 2
+                        throwMalformed(sp - 1, 1);  // or 2
                     }
                     putChar(dst, dp++, REPL);
                     continue;
                 }
                 if (!doReplace) {
-                    throwMalformed(offset - 1, 1);
+                    throwMalformed(sp - 1, 1);
                 }
-                offset++;
+                sp++;
                 putChar(dst, dp++, REPL);
-                if (offset < sl && StringCoding.isMalformed4_3(bytes[offset])) {
+                if (sp < sl && StringCoding.isMalformed4_3(src[sp])) {
                     continue;
                 }
                 break;
             } else {
                 if (!doReplace) {
-                    throwMalformed(offset - 1, 1);
+                    throwMalformed(sp - 1, 1);
                 }
                 putChar(dst, dp++, REPL);
             }
@@ -551,7 +550,7 @@ static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offs
     static int malformedN(byte[] src, int sp, int nb) {
         if (nb == 3) {
             int b1 = src[sp++];
-            int b2 = src[sp++];    // no need to lookup b3
+            int b2 = src[sp];    // no need to lookup b3
             return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
                     isNotContinuation(b2)) ? 1 : 2;
         } else if (nb == 4) { // we don't care the speed here
@@ -562,7 +561,7 @@ static int malformedN(byte[] src, int sp, int nb) {
                 (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
                 isNotContinuation(b2))
                 return 1;
-            if (isNotContinuation(src[sp++]))
+            if (isNotContinuation(src[sp]))
                 return 2;
             return 3;
         }
@@ -570,7 +569,7 @@ static int malformedN(byte[] src, int sp, int nb) {
         return -1;
     }
 
-    static void throwMalformed(int off, int nb) {
+    private static void throwMalformed(int off, int nb) {
         String msg = "malformed input off : " + off + ", length : " + nb;
         throw new IllegalArgumentException(msg, new MalformedInputException(nb));
     }
@@ -581,15 +580,15 @@ static void throwMalformed(byte[] val) {
         throwMalformed(dp, 1);
     }
 
-    static void throwUnmappable(int off, int nb) {
-        String msg = "malformed input off : " + off + ", length : " + nb;
-        throw new IllegalArgumentException(msg, new UnmappableCharacterException(nb));
+    private static void throwUnmappable(int off) {
+        String msg = "malformed input off : " + off + ", length : 1";
+        throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
     }
 
     static void throwUnmappable(byte[] val) {
         int dp = 0;
         while (dp < val.length && val[dp] >=0) { dp++; }
-        throwUnmappable(dp, 1);
+        throwUnmappable(dp);
     }
 
     private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
@@ -601,11 +600,10 @@ private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
 
         int dp = 0;
         byte[] dst = new byte[val.length << 1];
-        for (int sp = 0; sp < val.length; sp++) {
-            byte c = val[sp];
+        for (byte c : val) {
             if (c < 0) {
-                dst[dp++] = (byte)(0xc0 | ((c & 0xff) >> 6));
-                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+                dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
+                dst[dp++] = (byte) (0x80 | (c & 0x3f));
             } else {
                 dst[dp++] = c;
             }
@@ -644,7 +642,7 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
                     if (doReplace) {
                         dst[dp++] = '?';
                     } else {
-                        throwUnmappable(sp - 1, 1); // or 2, does not matter here
+                        throwUnmappable(sp - 1);
                     }
                 } else {
                     dst[dp++] = (byte)(0xf0 | ((uc >> 18)));

From a45b761df1e5f3e2caf078e21f020c10c453b443 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 21 Jan 2021 20:48:23 +0100
Subject: [PATCH 21/25] Move most of the encode/decode code to String, remove
 StringEncoder and the ThreadLocal encoder facility.

---
 .../share/classes/java/lang/String.java       | 593 ++++++++++++++-
 .../share/classes/java/lang/StringCoding.java | 698 +-----------------
 .../share/classes/java/lang/System.java       |   4 +-
 .../openjdk/bench/java/lang/StringEncode.java |  88 +++
 4 files changed, 667 insertions(+), 716 deletions(-)
 create mode 100644 test/micro/org/openjdk/bench/java/lang/StringEncode.java

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 7608452b8d1d7..1ed1bedb6c080 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -54,11 +54,11 @@
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 import jdk.internal.vm.annotation.Stable;
 import sun.nio.cs.ArrayDecoder;
+import sun.nio.cs.ArrayEncoder;
 
 import static java.lang.StringCoding.ISO_8859_1;
 import static java.lang.StringCoding.US_ASCII;
 import static java.lang.StringCoding.UTF_8;
-import static java.util.function.Predicate.not;
 
 /**
  * The {@code String} class represents character strings. All
@@ -483,7 +483,7 @@ public String(byte ascii[], int hibyte) {
      */
     public String(byte[] bytes, int offset, int length, String charsetName)
             throws UnsupportedEncodingException {
-        this(bytes, offset, length, StringCoding.lookupCharset(charsetName));
+        this(bytes, offset, length, lookupCharset(charsetName));
     }
 
     /**
@@ -522,13 +522,10 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
         if (length == 0) {
             this.value = "".value;
             this.coder = "".coder;
-            return;
-        }
-        if (charset == UTF_8) {
+        } else if (charset == UTF_8) {
             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                 this.coder = LATIN1;
-                return;
             } else {
                 int sl = offset + length;
                 int dp = 0;
@@ -545,8 +542,8 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                         if ((b1 == (byte)0xc2 || b1 == (byte)0xc3) &&
                                 offset + 1 < sl) {
                             int b2 = bytes[offset + 1];
-                            if (!StringCoding.isNotContinuation(b2)) {
-                                dst[dp++] = (byte)StringCoding.decode2(b1, b2);
+                            if (!isNotContinuation(b2)) {
+                                dst[dp++] = (byte)decode2(b1, b2);
                                 offset += 2;
                                 continue;
                             }
@@ -571,7 +568,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                     StringLatin1.inflate(dst, 0, buf, 0, dp);
                     dst = buf;
                 }
-                dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
+                dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, true);
                 if (dp != length) {
                     dst = Arrays.copyOf(dst, dp << 1);
                 }
@@ -595,7 +592,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 int dp = 0;
                 while (dp < length) {
                     int b = bytes[offset++];
-                    StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : StringCoding.REPL);
+                    StringUTF16.putChar(dst, dp++, (b >= 0) ? (char) b : REPL);
                 }
                 this.value = dst;
                 this.coder = UTF16;
@@ -633,7 +630,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                     return;
                 }
 
-                int en = StringCoding.scale(length, cd.maxCharsPerByte());
+                int en = scale(length, cd.maxCharsPerByte());
                 cd.onMalformedInput(CodingErrorAction.REPLACE)
                         .onUnmappableCharacter(CodingErrorAction.REPLACE);
                 char[] ca = new char[en];
@@ -652,7 +649,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
             }
 
             // decode using CharsetDecoder
-            int en = StringCoding.scale(length, cd.maxCharsPerByte());
+            int en = scale(length, cd.maxCharsPerByte());
             cd.onMalformedInput(CodingErrorAction.REPLACE)
                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
             char[] ca = new char[en];
@@ -662,7 +659,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 offset = 0;
             }
 
-            int caLen = StringCoding.decodeWithDecoder(cd, ca, bytes, offset, length);
+            int caLen = decodeWithDecoder(cd, ca, bytes, offset, length);
             if (COMPACT_STRINGS) {
                 byte[] bs = StringUTF16.compress(ca, 0, caLen);
                 if (bs != null) {
@@ -702,8 +699,8 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
                     if ((b1 == (byte) 0xc2 || b1 == (byte) 0xc3) &&
                             offset + 1 < sl) {
                         int b2 = bytes[offset + 1];
-                        if (!StringCoding.isNotContinuation(b2)) {
-                            dst[dp++] = (byte) StringCoding.decode2(b1, b2);
+                        if (!isNotContinuation(b2)) {
+                            dst[dp++] = (byte) decode2(b1, b2);
                             offset += 2;
                             continue;
                         }
@@ -726,7 +723,7 @@ static String newStringUTF8NoRepl(byte[] bytes, int offset, int length) {
                 StringLatin1.inflate(dst, 0, buf, 0, dp);
                 dst = buf;
             }
-            dp = StringCoding.decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
+            dp = decodeUTF8_UTF16(bytes, offset, sl, dst, dp, false);
             if (dp != length) {
                 dst = Arrays.copyOf(dst, dp << 1);
             }
@@ -766,23 +763,24 @@ private static String newStringNoRepl1(byte[] src, Charset cs) {
                     return new String(src, LATIN1);
                 return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
             } else {
-                StringCoding.throwMalformed(src);
+                throwMalformed(src);
             }
         }
 
         CharsetDecoder cd = cs.newDecoder();
         // ascii fastpath
         if (cd instanceof ArrayDecoder ad &&
-                ad.isASCIICompatible() && !StringCoding.hasNegatives(src, 0, src.length)) {
+                ad.isASCIICompatible() &&
+                !StringCoding.hasNegatives(src, 0, src.length)) {
             return new String(src, 0, src.length, ISO_8859_1);
         }
-        int en = StringCoding.scale(len, cd.maxCharsPerByte());
+        int en = scale(len, cd.maxCharsPerByte());
         char[] ca = new char[en];
         if (cs.getClass().getClassLoader0() != null &&
                 System.getSecurityManager() != null) {
             src = Arrays.copyOf(src, len);
         }
-        int caLen = StringCoding.decodeWithDecoder(cd, ca, src, 0, src.length);
+        int caLen = decodeWithDecoder(cd, ca, src, 0, src.length);
         if (COMPACT_STRINGS) {
             byte[] bs = StringUTF16.compress(ca, 0, caLen);
             if (bs != null) {
@@ -792,6 +790,555 @@ private static String newStringNoRepl1(byte[] src, Charset cs) {
         return new String(StringUTF16.toBytes(ca, 0, caLen), UTF16);
     }
 
+    private static final char REPL = '\ufffd';
+
+    // Trim the given byte array to the given length
+    private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
+        if (len == ba.length && (isTrusted || System.getSecurityManager() == null)) {
+            return ba;
+        } else {
+            return Arrays.copyOf(ba, len);
+        }
+    }
+
+    private static int scale(int len, float expansionFactor) {
+        // We need to perform double, not float, arithmetic; otherwise
+        // we lose low order bits when len is larger than 2**24.
+        return (int)(len * (double)expansionFactor);
+    }
+
+    private static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
+        Objects.requireNonNull(csn);
+        try {
+            return Charset.forName(csn);
+        } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
+            throw new UnsupportedEncodingException(csn);
+        }
+    }
+
+    private static byte[] encode(String csn, byte coder, byte[] val)
+            throws UnsupportedEncodingException
+    {
+        return encode(lookupCharset(csn), coder, val);
+    }
+
+    private static byte[] encode(Charset cs, byte coder, byte[] val) {
+        if (val.length == 0) {
+            return "".value();
+        }
+        if (cs == UTF_8) {
+            return encodeUTF8(coder, val, true);
+        }
+        if (cs == ISO_8859_1) {
+            return encode8859_1(coder, val);
+        }
+        if (cs == US_ASCII) {
+            return encodeASCII(coder, val);
+        }
+        CharsetEncoder ce = cs.newEncoder();
+        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
+        int en = scale(len, ce.maxBytesPerChar());
+        if (ce instanceof ArrayEncoder ae) {
+            // fastpath for ascii compatible
+            if (coder == LATIN1 &&
+                    ae.isASCIICompatible() &&
+                    !StringCoding.hasNegatives(val, 0, val.length)) {
+                return Arrays.copyOf(val, val.length);
+            }
+            byte[] ba = new byte[en];
+            ce.onMalformedInput(CodingErrorAction.REPLACE)
+                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
+
+            int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
+                    : ae.encodeFromUTF16(val, 0, len, ba);
+            if (blen != -1) {
+                return safeTrim(ba, blen, true);
+            }
+        }
+
+        byte[] ba = new byte[en];
+        ce.onMalformedInput(CodingErrorAction.REPLACE)
+                .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+                : StringUTF16.toChars(val);
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new Error(x);
+        }
+        return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
+    }
+
+    private static byte[] encode(byte coder, byte[] val) {
+        return encode(Charset.defaultCharset(), coder, val);
+    }
+
+    private static byte[] encodeASCII(byte coder, byte[] val) {
+        if (coder == LATIN1) {
+            byte[] dst = new byte[val.length];
+            for (int i = 0; i < val.length; i++) {
+                if (val[i] < 0) {
+                    dst[i] = '?';
+                } else {
+                    dst[i] = val[i];
+                }
+            }
+            return dst;
+        }
+        int len = val.length >> 1;
+        byte[] dst = new byte[len];
+        int dp = 0;
+        for (int i = 0; i < len; i++) {
+            char c = StringUTF16.getChar(val, i);
+            if (c < 0x80) {
+                dst[dp++] = (byte)c;
+                continue;
+            }
+            if (Character.isHighSurrogate(c) && i + 1 < len &&
+                    Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
+                i++;
+            }
+            dst[dp++] = '?';
+        }
+        if (len == dp) {
+            return dst;
+        }
+        return Arrays.copyOf(dst, dp);
+    }
+
+    private static byte[] encode8859_1(byte coder, byte[] val) {
+        return encode8859_1(coder, val, true);
+    }
+
+    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
+        if (coder == LATIN1) {
+            return Arrays.copyOf(val, val.length);
+        }
+        int len = val.length >> 1;
+        byte[] dst = new byte[len];
+        int dp = 0;
+        int sp = 0;
+        int sl = len;
+        while (sp < sl) {
+            int ret = StringCoding.implEncodeISOArray(val, sp, dst, dp, len);
+            sp = sp + ret;
+            dp = dp + ret;
+            if (ret != len) {
+                if (!doReplace) {
+                    throwUnmappable(sp);
+                }
+                char c = StringUTF16.getChar(val, sp++);
+                if (Character.isHighSurrogate(c) && sp < sl &&
+                        Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
+                    sp++;
+                }
+                dst[dp++] = '?';
+                len = sl - sp;
+            }
+        }
+        if (dp == dst.length) {
+            return dst;
+        }
+        return Arrays.copyOf(dst, dp);
+    }
+
+    //////////////////////////////// utf8 ////////////////////////////////////
+
+
+    private static boolean isNotContinuation(int b) {
+        return (b & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed3(int b1, int b2, int b3) {
+        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+                (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed3_2(int b1, int b2) {
+        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+                (b2 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4(int b2, int b3, int b4) {
+        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
+                (b4 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4_2(int b1, int b2) {
+        return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
+                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+                (b2 & 0xc0) != 0x80;
+    }
+
+    private static boolean isMalformed4_3(int b3) {
+        return (b3 & 0xc0) != 0x80;
+    }
+
+    private static char decode2(int b1, int b2) {
+        return (char)(((b1 << 6) ^ b2) ^
+                (((byte) 0xC0 << 6) ^
+                        ((byte) 0x80 << 0)));
+    }
+
+    private static char decode3(int b1, int b2, int b3) {
+        return (char)((b1 << 12) ^
+                (b2 <<  6) ^
+                (b3 ^
+                        (((byte) 0xE0 << 12) ^
+                                ((byte) 0x80 <<  6) ^
+                                ((byte) 0x80 <<  0))));
+    }
+
+    private static int decode4(int b1, int b2, int b3, int b4) {
+        return ((b1 << 18) ^
+                (b2 << 12) ^
+                (b3 <<  6) ^
+                (b4 ^
+                        (((byte) 0xF0 << 18) ^
+                                ((byte) 0x80 << 12) ^
+                                ((byte) 0x80 <<  6) ^
+                                ((byte) 0x80 <<  0))));
+    }
+
+    private static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
+        while (sp < sl) {
+            int b1 = src[sp++];
+            if (b1 >= 0) {
+                StringUTF16.putChar(dst, dp++, (char) b1);
+            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
+                if (sp < sl) {
+                    int b2 = src[sp++];
+                    if (isNotContinuation(b2)) {
+                        if (!doReplace) {
+                            throwMalformed(sp - 1, 1);
+                        }
+                        StringUTF16.putChar(dst, dp++, REPL);
+                        sp--;
+                    } else {
+                        StringUTF16.putChar(dst, dp++, decode2(b1, b2));
+                    }
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(sp, 1);  // underflow()
+                }
+                StringUTF16.putChar(dst, dp++, REPL);
+                break;
+            } else if ((b1 >> 4) == -2) {
+                if (sp + 1 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
+                    if (isMalformed3(b1, b2, b3)) {
+                        if (!doReplace) {
+                            throwMalformed(sp - 3, 3);
+                        }
+                        StringUTF16.putChar(dst, dp++, REPL);
+                        sp -= 3;
+                        sp += malformed3(src, sp);
+                    } else {
+                        char c = decode3(b1, b2, b3);
+                        if (Character.isSurrogate(c)) {
+                            if (!doReplace) {
+                                throwMalformed(sp - 3, 3);
+                            }
+                            StringUTF16.putChar(dst, dp++, REPL);
+                        } else {
+                            StringUTF16.putChar(dst, dp++, c);
+                        }
+                    }
+                    continue;
+                }
+                if (sp < sl && isMalformed3_2(b1, src[sp])) {
+                    if (!doReplace) {
+                        throwMalformed(sp - 1, 2);
+                    }
+                    StringUTF16.putChar(dst, dp++, REPL);
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(sp, 1);
+                }
+                StringUTF16.putChar(dst, dp++, REPL);
+                break;
+            } else if ((b1 >> 3) == -2) {
+                if (sp + 2 < sl) {
+                    int b2 = src[sp++];
+                    int b3 = src[sp++];
+                    int b4 = src[sp++];
+                    int uc = decode4(b1, b2, b3, b4);
+                    if (isMalformed4(b2, b3, b4) ||
+                            !Character.isSupplementaryCodePoint(uc)) { // shortest form check
+                        if (!doReplace) {
+                            throwMalformed(sp - 4, 4);
+                        }
+                        StringUTF16.putChar(dst, dp++, REPL);
+                        sp -= 4;
+                        sp += malformed4(src, sp);
+                    } else {
+                        StringUTF16.putChar(dst, dp++, Character.highSurrogate(uc));
+                        StringUTF16.putChar(dst, dp++, Character.lowSurrogate(uc));
+                    }
+                    continue;
+                }
+                b1 &= 0xff;
+                if (b1 > 0xf4 || sp < sl && isMalformed4_2(b1, src[sp] & 0xff)) {
+                    if (!doReplace) {
+                        throwMalformed(sp - 1, 1);  // or 2
+                    }
+                    StringUTF16.putChar(dst, dp++, REPL);
+                    continue;
+                }
+                if (!doReplace) {
+                    throwMalformed(sp - 1, 1);
+                }
+                sp++;
+                StringUTF16.putChar(dst, dp++, REPL);
+                if (sp < sl && isMalformed4_3(src[sp])) {
+                    continue;
+                }
+                break;
+            } else {
+                if (!doReplace) {
+                    throwMalformed(sp - 1, 1);
+                }
+                StringUTF16.putChar(dst, dp++, REPL);
+            }
+        }
+        return dp;
+    }
+
+    private static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
+        ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
+        CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
+        try {
+            CoderResult cr = cd.decode(bb, cb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = cd.flush(cb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            // Substitution is always enabled,
+            // so this shouldn't happen
+            throw new Error(x);
+        }
+        return cb.position();
+    }
+
+    private static int malformed3(byte[] src, int sp) {
+        int b1 = src[sp++];
+        int b2 = src[sp];    // no need to lookup b3
+        return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
+                isNotContinuation(b2)) ? 1 : 2;
+    }
+
+    private static int malformed4(byte[] src, int sp) {
+        // we don't care the speed here
+        int b1 = src[sp++] & 0xff;
+        int b2 = src[sp++] & 0xff;
+        if (b1 > 0xf4 ||
+                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
+                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
+                isNotContinuation(b2))
+            return 1;
+        if (isNotContinuation(src[sp]))
+            return 2;
+        return 3;
+    }
+
+    private static void throwMalformed(int off, int nb) {
+        String msg = "malformed input off : " + off + ", length : " + nb;
+        throw new IllegalArgumentException(msg, new MalformedInputException(nb));
+    }
+
+    private static void throwMalformed(byte[] val) {
+        int dp = 0;
+        while (dp < val.length && val[dp] >=0) { dp++; }
+        throwMalformed(dp, 1);
+    }
+
+    private static void throwUnmappable(int off) {
+        String msg = "malformed input off : " + off + ", length : 1";
+        throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
+    }
+
+    private static void throwUnmappable(byte[] val) {
+        int dp = 0;
+        while (dp < val.length && val[dp] >=0) { dp++; }
+        throwUnmappable(dp);
+    }
+
+    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
+        if (coder == UTF16)
+            return encodeUTF8_UTF16(val, doReplace);
+
+        if (!StringCoding.hasNegatives(val, 0, val.length))
+            return Arrays.copyOf(val, val.length);
+
+        int dp = 0;
+        byte[] dst = new byte[val.length << 1];
+        for (byte c : val) {
+            if (c < 0) {
+                dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
+                dst[dp++] = (byte) (0x80 | (c & 0x3f));
+            } else {
+                dst[dp++] = c;
+            }
+        }
+        if (dp == dst.length)
+            return dst;
+        return Arrays.copyOf(dst, dp);
+    }
+
+    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
+        int dp = 0;
+        int sp = 0;
+        int sl = val.length >> 1;
+        byte[] dst = new byte[sl * 3];
+        char c;
+        while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
+            // ascii fast loop;
+            dst[dp++] = (byte)c;
+            sp++;
+        }
+        while (sp < sl) {
+            c = StringUTF16.getChar(val, sp++);
+            if (c < 0x80) {
+                dst[dp++] = (byte)c;
+            } else if (c < 0x800) {
+                dst[dp++] = (byte)(0xc0 | (c >> 6));
+                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+            } else if (Character.isSurrogate(c)) {
+                int uc = -1;
+                char c2;
+                if (Character.isHighSurrogate(c) && sp < sl &&
+                        Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
+                    uc = Character.toCodePoint(c, c2);
+                }
+                if (uc < 0) {
+                    if (doReplace) {
+                        dst[dp++] = '?';
+                    } else {
+                        throwUnmappable(sp - 1);
+                    }
+                } else {
+                    dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
+                    dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
+                    dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
+                    dst[dp++] = (byte)(0x80 | (uc & 0x3f));
+                    sp++;  // 2 chars
+                }
+            } else {
+                // 3 bytes, 16 bits
+                dst[dp++] = (byte)(0xe0 | ((c >> 12)));
+                dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
+                dst[dp++] = (byte)(0x80 | (c & 0x3f));
+            }
+        }
+        if (dp == dst.length) {
+            return dst;
+        }
+        return Arrays.copyOf(dst, dp);
+    }
+
+    /*
+     * Throws iae, instead of replacing, if unmappable.
+     */
+    static byte[] getBytesUTF8NoRepl(String s) {
+        return encodeUTF8(s.coder(), s.value(), false);
+    }
+
+    ////////////////////// for j.n.f.Files //////////////////////////
+
+    private static boolean isASCII(byte[] src) {
+        return !StringCoding.hasNegatives(src, 0, src.length);
+    }
+
+    /*
+     * Throws CCE, instead of replacing, if unmappable.
+     */
+    static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
+        try {
+            return getBytesNoRepl1(s, cs);
+        } catch (IllegalArgumentException e) {
+            //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
+            Throwable cause = e.getCause();
+            if (cause instanceof UnmappableCharacterException) {
+                throw (UnmappableCharacterException)cause;
+            }
+            throw (CharacterCodingException)cause;
+        }
+    }
+
+    private static byte[] getBytesNoRepl1(String s, Charset cs) {
+        byte[] val = s.value();
+        byte coder = s.coder();
+        if (cs == UTF_8) {
+            if (coder == LATIN1 && isASCII(val)) {
+                return val;
+            }
+            return encodeUTF8(coder, val, false);
+        }
+        if (cs == ISO_8859_1) {
+            if (coder == LATIN1) {
+                return val;
+            }
+            return encode8859_1(coder, val, false);
+        }
+        if (cs == US_ASCII) {
+            if (coder == LATIN1) {
+                if (isASCII(val)) {
+                    return val;
+                } else {
+                    throwUnmappable(val);
+                }
+            }
+        }
+        CharsetEncoder ce = cs.newEncoder();
+        // fastpath for ascii compatible
+        if (coder == LATIN1 &&
+                ce instanceof ArrayEncoder ae &&
+                ae.isASCIICompatible() &&
+                isASCII(val)) {
+            return val;
+        }
+        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
+        int en = scale(len, ce.maxBytesPerChar());
+        byte[] ba = new byte[en];
+        if (len == 0) {
+            return ba;
+        }
+        if (ce instanceof ArrayEncoder ae) {
+            int blen = (coder == LATIN1 ) ? ae.encodeFromLatin1(val, 0, len, ba)
+                                          : ae.encodeFromUTF16(val, 0, len, ba);
+            if (blen != -1) {
+                return safeTrim(ba, blen, true);
+            }
+        }
+        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
+                                       : StringUTF16.toChars(val);
+        ByteBuffer bb = ByteBuffer.wrap(ba);
+        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
+        try {
+            CoderResult cr = ce.encode(cb, bb, true);
+            if (!cr.isUnderflow())
+                cr.throwException();
+            cr = ce.flush(bb);
+            if (!cr.isUnderflow())
+                cr.throwException();
+        } catch (CharacterCodingException x) {
+            throw new IllegalArgumentException(x);
+        }
+        return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null );
+    }
+
     /**
      * Constructs a new {@code String} by decoding the specified array of bytes
      * using the specified {@linkplain java.nio.charset.Charset charset}.  The
@@ -1219,7 +1766,7 @@ public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
     public byte[] getBytes(String charsetName)
             throws UnsupportedEncodingException {
         if (charsetName == null) throw new NullPointerException();
-        return StringCoding.encode(charsetName, coder(), value);
+        return encode(charsetName, coder(), value);
     }
 
     /**
@@ -1242,7 +1789,7 @@ public byte[] getBytes(String charsetName)
      */
     public byte[] getBytes(Charset charset) {
         if (charset == null) throw new NullPointerException();
-        return StringCoding.encode(charset, coder(), value);
+        return encode(charset, coder(), value);
      }
 
     /**
@@ -1259,7 +1806,7 @@ public byte[] getBytes(Charset charset) {
      * @since      1.1
      */
     public byte[] getBytes() {
-        return StringCoding.encode(coder(), value);
+        return encode(coder(), value);
     }
 
     /**
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index b455e167bf7fc..22439ed6ea30b 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -25,255 +25,22 @@
 
 package java.lang;
 
-import java.io.UnsupportedEncodingException;
-import java.lang.ref.SoftReference;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 import java.nio.charset.Charset;
-import java.nio.charset.CharsetDecoder;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CharacterCodingException;
-import java.nio.charset.CoderResult;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.IllegalCharsetNameException;
-import java.nio.charset.MalformedInputException;
-import java.nio.charset.UnmappableCharacterException;
-import java.nio.charset.UnsupportedCharsetException;
-import java.util.Arrays;
-import java.util.Objects;
 
 import jdk.internal.vm.annotation.IntrinsicCandidate;
-import sun.nio.cs.HistoricallyNamedCharset;
-import sun.nio.cs.ArrayEncoder;
 
-import static java.lang.String.LATIN1;
-import static java.lang.String.UTF16;
-import static java.lang.StringUTF16.putChar;
 
 /**
  * Utility class for string encoding and decoding.
  */
-
 class StringCoding {
 
     private StringCoding() { }
 
-    /** The cached coders for each thread */
-    private static final ThreadLocal<SoftReference<StringEncoder>> encoder =
-        new ThreadLocal<>();
-
     static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
     static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
     static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
 
-    static final char REPL = '\ufffd';
-
-    private static <T> T deref(ThreadLocal<SoftReference<T>> tl) {
-        SoftReference<T> sr = tl.get();
-        if (sr == null)
-            return null;
-        return sr.get();
-    }
-
-    private static <T> void set(ThreadLocal<SoftReference<T>> tl, T ob) {
-        tl.set(new SoftReference<>(ob));
-    }
-
-    // Trim the given byte array to the given length
-    private static byte[] safeTrim(byte[] ba, int len, boolean isTrusted) {
-        if (len == ba.length && (isTrusted || System.getSecurityManager() == null))
-            return ba;
-        else
-            return Arrays.copyOf(ba, len);
-    }
-
-    static int scale(int len, float expansionFactor) {
-        // We need to perform double, not float, arithmetic; otherwise
-        // we lose low order bits when len is larger than 2**24.
-        return (int)(len * (double)expansionFactor);
-    }
-
-    static Charset lookupCharset(String csn) throws UnsupportedEncodingException {
-        Objects.requireNonNull(csn);
-        try {
-            return Charset.forName(csn);
-        } catch (UnsupportedCharsetException | IllegalCharsetNameException x) {
-            throw new UnsupportedEncodingException(csn);
-        }
-    }
-
-    @IntrinsicCandidate
-    public static boolean hasNegatives(byte[] ba, int off, int len) {
-        for (int i = off; i < off + len; i++) {
-            if (ba[i] < 0) {
-                return true;
-            }
-        }
-        return false;
-    }
-
-    // -- Encoding --
-    private static class StringEncoder {
-        private Charset cs;
-        private CharsetEncoder ce;
-        private final boolean isASCIICompatible;
-        private final String requestedCharsetName;
-        private final boolean isTrusted;
-
-        private StringEncoder(Charset cs, String rcn) {
-            this.requestedCharsetName = rcn;
-            this.cs = cs;
-            this.ce = cs.newEncoder()
-                .onMalformedInput(CodingErrorAction.REPLACE)
-                .onUnmappableCharacter(CodingErrorAction.REPLACE);
-            this.isTrusted = (cs.getClass().getClassLoader0() == null);
-            this.isASCIICompatible = (ce instanceof ArrayEncoder) &&
-                    ((ArrayEncoder)ce).isASCIICompatible();
-        }
-
-        private String charsetName() {
-            if (cs instanceof HistoricallyNamedCharset)
-                return ((HistoricallyNamedCharset)cs).historicalName();
-            return cs.name();
-        }
-
-        private final String requestedCharsetName() {
-            return requestedCharsetName;
-        }
-
-        private byte[] encode(byte coder, byte[] val) {
-            // fastpath for ascii compatible
-            if (coder == LATIN1 && isASCIICompatible &&
-                !hasNegatives(val, 0, val.length)) {
-                return Arrays.copyOf(val, val.length);
-            }
-            int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-            int en = scale(len, ce.maxBytesPerChar());
-            byte[] ba = new byte[en];
-            if (len == 0) {
-                return ba;
-            }
-            if (ce instanceof ArrayEncoder) {
-                int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
-                                              : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
-                if (blen != -1) {
-                    return safeTrim(ba, blen, isTrusted);
-                }
-            }
-            char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-                                           : StringUTF16.toChars(val);
-            ce.reset();
-            ByteBuffer bb = ByteBuffer.wrap(ba);
-            CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-            try {
-                CoderResult cr = ce.encode(cb, bb, true);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-                cr = ce.flush(bb);
-                if (!cr.isUnderflow())
-                    cr.throwException();
-            } catch (CharacterCodingException x) {
-                // Substitution is always enabled,
-                // so this shouldn't happen
-                throw new Error(x);
-            }
-            return safeTrim(ba, bb.position(), isTrusted);
-        }
-    }
-
-    static byte[] encode(String csn, byte coder, byte[] val)
-        throws UnsupportedEncodingException
-    {
-        StringEncoder se = deref(encoder);
-        if ((se == null) || !(csn.equals(se.requestedCharsetName())
-                              || csn.equals(se.charsetName()))) {
-            Charset cs = lookupCharset(csn);
-            if (cs == UTF_8) {
-                return encodeUTF8(coder, val, true);
-            }
-            if (cs == ISO_8859_1) {
-                return encode8859_1(coder, val);
-            }
-            if (cs == US_ASCII) {
-                return encodeASCII(coder, val);
-            }
-            se = new StringEncoder(cs, csn);
-            set(encoder, se);
-        }
-        return se.encode(coder, val);
-    }
-
-    static byte[] encode(Charset cs, byte coder, byte[] val) {
-        if (cs == UTF_8) {
-            return encodeUTF8(coder, val, true);
-        }
-        if (cs == ISO_8859_1) {
-            return encode8859_1(coder, val);
-        }
-        if (cs == US_ASCII) {
-            return encodeASCII(coder, val);
-        }
-        CharsetEncoder ce = cs.newEncoder();
-        // fastpath for ascii compatible
-        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
-                                 ((ArrayEncoder)ce).isASCIICompatible() &&
-                                 !hasNegatives(val, 0, val.length)))) {
-            return Arrays.copyOf(val, val.length);
-        }
-        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-        int en = scale(len, ce.maxBytesPerChar());
-        byte[] ba = new byte[en];
-        if (len == 0) {
-            return ba;
-        }
-        ce.onMalformedInput(CodingErrorAction.REPLACE)
-          .onUnmappableCharacter(CodingErrorAction.REPLACE)
-          .reset();
-        if (ce instanceof ArrayEncoder) {
-            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
-                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
-            if (blen != -1) {
-                return safeTrim(ba, blen, true);
-            }
-        }
-        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
-                            System.getSecurityManager() == null;
-        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-                                       : StringUTF16.toChars(val);
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-        try {
-            CoderResult cr = ce.encode(cb, bb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = ce.flush(bb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new Error(x);
-        }
-        return safeTrim(ba, bb.position(), isTrusted);
-    }
-
-    static byte[] encode(byte coder, byte[] val) {
-        Charset cs = Charset.defaultCharset();
-        if (cs == UTF_8) {
-            return encodeUTF8(coder, val, true);
-        }
-        if (cs == ISO_8859_1) {
-            return encode8859_1(coder, val);
-        }
-        if (cs == US_ASCII) {
-            return encodeASCII(coder, val);
-        }
-        StringEncoder se = deref(encoder);
-        if (se == null || !cs.name().equals(se.cs.name())) {
-            se = new StringEncoder(cs, cs.name());
-            set(encoder, se);
-        }
-        return se.encode(coder, val);
-    }
-
     /**
      *  Print a message directly to stderr, bypassing all character conversion
      *  methods.
@@ -281,41 +48,18 @@ static byte[] encode(byte coder, byte[] val) {
      */
     private static native void err(String msg);
 
-    private static byte[] encodeASCII(byte coder, byte[] val) {
-        if (coder == LATIN1) {
-            byte[] dst = new byte[val.length];
-            for (int i = 0; i < val.length; i++) {
-                if (val[i] < 0) {
-                    dst[i] = '?';
-                } else {
-                    dst[i] = val[i];
-                }
-            }
-            return dst;
-        }
-        int len = val.length >> 1;
-        byte[] dst = new byte[len];
-        int dp = 0;
-        for (int i = 0; i < len; i++) {
-            char c = StringUTF16.getChar(val, i);
-            if (c < 0x80) {
-                dst[dp++] = (byte)c;
-                continue;
-            }
-            if (Character.isHighSurrogate(c) && i + 1 < len &&
-                Character.isLowSurrogate(StringUTF16.getChar(val, i + 1))) {
-                i++;
+    @IntrinsicCandidate
+    public static boolean hasNegatives(byte[] ba, int off, int len) {
+        for (int i = off; i < off + len; i++) {
+            if (ba[i] < 0) {
+                return true;
             }
-            dst[dp++] = '?';
         }
-        if (len == dp) {
-            return dst;
-        }
-        return Arrays.copyOf(dst, dp);
+        return false;
     }
 
     @IntrinsicCandidate
-    private static int implEncodeISOArray(byte[] sa, int sp,
+    public static int implEncodeISOArray(byte[] sa, int sp,
                                           byte[] da, int dp, int len) {
         int i = 0;
         for (; i < len; i++) {
@@ -327,432 +71,4 @@ private static int implEncodeISOArray(byte[] sa, int sp,
         return i;
     }
 
-    private static byte[] encode8859_1(byte coder, byte[] val) {
-        return encode8859_1(coder, val, true);
-    }
-
-    private static byte[] encode8859_1(byte coder, byte[] val, boolean doReplace) {
-        if (coder == LATIN1) {
-            return Arrays.copyOf(val, val.length);
-        }
-        int len = val.length >> 1;
-        byte[] dst = new byte[len];
-        int dp = 0;
-        int sp = 0;
-        int sl = len;
-        while (sp < sl) {
-            int ret = implEncodeISOArray(val, sp, dst, dp, len);
-            sp = sp + ret;
-            dp = dp + ret;
-            if (ret != len) {
-                if (!doReplace) {
-                    throwUnmappable(sp);
-                }
-                char c = StringUTF16.getChar(val, sp++);
-                if (Character.isHighSurrogate(c) && sp < sl &&
-                    Character.isLowSurrogate(StringUTF16.getChar(val, sp))) {
-                    sp++;
-                }
-                dst[dp++] = '?';
-                len = sl - sp;
-            }
-        }
-        if (dp == dst.length) {
-            return dst;
-        }
-        return Arrays.copyOf(dst, dp);
-    }
-
-    //////////////////////////////// utf8 ////////////////////////////////////
-
-
-    static boolean isNotContinuation(int b) {
-        return (b & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed3(int b1, int b2, int b3) {
-        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-               (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed3_2(int b1, int b2) {
-        return (b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-               (b2 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4(int b2, int b3, int b4) {
-        return (b2 & 0xc0) != 0x80 || (b3 & 0xc0) != 0x80 ||
-               (b4 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4_2(int b1, int b2) {
-        return (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
-               (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
-               (b2 & 0xc0) != 0x80;
-    }
-
-    private static boolean isMalformed4_3(int b3) {
-        return (b3 & 0xc0) != 0x80;
-    }
-
-    static char decode2(int b1, int b2) {
-        return (char)(((b1 << 6) ^ b2) ^
-                      (((byte) 0xC0 << 6) ^
-                       ((byte) 0x80 << 0)));
-    }
-
-    private static char decode3(int b1, int b2, int b3) {
-        return (char)((b1 << 12) ^
-                      (b2 <<  6) ^
-                      (b3 ^
-                       (((byte) 0xE0 << 12) ^
-                        ((byte) 0x80 <<  6) ^
-                        ((byte) 0x80 <<  0))));
-    }
-
-    private static int decode4(int b1, int b2, int b3, int b4) {
-        return ((b1 << 18) ^
-                (b2 << 12) ^
-                (b3 <<  6) ^
-                (b4 ^
-                 (((byte) 0xF0 << 18) ^
-                  ((byte) 0x80 << 12) ^
-                  ((byte) 0x80 <<  6) ^
-                  ((byte) 0x80 <<  0))));
-    }
-
-    static int decodeUTF8_UTF16(byte[] src, int sp, int sl, byte[] dst, int dp, boolean doReplace) {
-        while (sp < sl) {
-            int b1 = src[sp++];
-            if (b1 >= 0) {
-                putChar(dst, dp++, (char) b1);
-            } else if ((b1 >> 5) == -2 && (b1 & 0x1e) != 0) {
-                if (sp < sl) {
-                    int b2 = src[sp++];
-                    if (StringCoding.isNotContinuation(b2)) {
-                        if (!doReplace) {
-                            throwMalformed(sp - 1, 1);
-                        }
-                        putChar(dst, dp++, REPL);
-                        sp--;
-                    } else {
-                        putChar(dst, dp++, decode2(b1, b2));
-                    }
-                    continue;
-                }
-                if (!doReplace) {
-                    throwMalformed(sp, 1);  // underflow()
-                }
-                putChar(dst, dp++, REPL);
-                break;
-            } else if ((b1 >> 4) == -2) {
-                if (sp + 1 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    if (isMalformed3(b1, b2, b3)) {
-                        if (!doReplace) {
-                            throwMalformed(sp - 3, 3);
-                        }
-                        putChar(dst, dp++, REPL);
-                        sp -= 3;
-                        sp += malformedN(src, sp, 3);
-                    } else {
-                        char c = decode3(b1, b2, b3);
-                        if (Character.isSurrogate(c)) {
-                            if (!doReplace) {
-                                throwMalformed(sp - 3, 3);
-                            }
-                            putChar(dst, dp++, REPL);
-                        } else {
-                            putChar(dst, dp++, c);
-                        }
-                    }
-                    continue;
-                }
-                if (sp < sl && isMalformed3_2(b1, src[sp])) {
-                    if (!doReplace) {
-                        throwMalformed(sp - 1, 2);
-                    }
-                    putChar(dst, dp++, REPL);
-                    continue;
-                }
-                if (!doReplace) {
-                    throwMalformed(sp, 1);
-                }
-                putChar(dst, dp++, REPL);
-                break;
-            } else if ((b1 >> 3) == -2) {
-                if (sp + 2 < sl) {
-                    int b2 = src[sp++];
-                    int b3 = src[sp++];
-                    int b4 = src[sp++];
-                    int uc = decode4(b1, b2, b3, b4);
-                    if (isMalformed4(b2, b3, b4) ||
-                            !Character.isSupplementaryCodePoint(uc)) { // shortest form check
-                        if (!doReplace) {
-                            throwMalformed(sp - 4, 4);
-                        }
-                        putChar(dst, dp++, REPL);
-                        sp -= 4;
-                        sp += StringCoding.malformedN(src, sp, 4);
-                    } else {
-                        putChar(dst, dp++, Character.highSurrogate(uc));
-                        putChar(dst, dp++, Character.lowSurrogate(uc));
-                    }
-                    continue;
-                }
-                b1 &= 0xff;
-                if (b1 > 0xf4 || sp < sl && StringCoding.isMalformed4_2(b1, src[sp] & 0xff)) {
-                    if (!doReplace) {
-                        throwMalformed(sp - 1, 1);  // or 2
-                    }
-                    putChar(dst, dp++, REPL);
-                    continue;
-                }
-                if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
-                }
-                sp++;
-                putChar(dst, dp++, REPL);
-                if (sp < sl && StringCoding.isMalformed4_3(src[sp])) {
-                    continue;
-                }
-                break;
-            } else {
-                if (!doReplace) {
-                    throwMalformed(sp - 1, 1);
-                }
-                putChar(dst, dp++, REPL);
-            }
-        }
-        return dp;
-    }
-
-    static int decodeWithDecoder(CharsetDecoder cd, char[] dst, byte[] src, int offset, int length) {
-        ByteBuffer bb = ByteBuffer.wrap(src, offset, length);
-        CharBuffer cb = CharBuffer.wrap(dst, 0, dst.length);
-        try {
-            CoderResult cr = cd.decode(bb, cb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = cd.flush(cb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            // Substitution is always enabled,
-            // so this shouldn't happen
-            throw new Error(x);
-        }
-        return cb.position();
-    }
-
-    // for nb == 3/4
-    static int malformedN(byte[] src, int sp, int nb) {
-        if (nb == 3) {
-            int b1 = src[sp++];
-            int b2 = src[sp];    // no need to lookup b3
-            return ((b1 == (byte)0xe0 && (b2 & 0xe0) == 0x80) ||
-                    isNotContinuation(b2)) ? 1 : 2;
-        } else if (nb == 4) { // we don't care the speed here
-            int b1 = src[sp++] & 0xff;
-            int b2 = src[sp++] & 0xff;
-            if (b1 > 0xf4 ||
-                (b1 == 0xf0 && (b2 < 0x90 || b2 > 0xbf)) ||
-                (b1 == 0xf4 && (b2 & 0xf0) != 0x80) ||
-                isNotContinuation(b2))
-                return 1;
-            if (isNotContinuation(src[sp]))
-                return 2;
-            return 3;
-        }
-        assert false;
-        return -1;
-    }
-
-    private static void throwMalformed(int off, int nb) {
-        String msg = "malformed input off : " + off + ", length : " + nb;
-        throw new IllegalArgumentException(msg, new MalformedInputException(nb));
-    }
-
-    static void throwMalformed(byte[] val) {
-        int dp = 0;
-        while (dp < val.length && val[dp] >=0) { dp++; }
-        throwMalformed(dp, 1);
-    }
-
-    private static void throwUnmappable(int off) {
-        String msg = "malformed input off : " + off + ", length : 1";
-        throw new IllegalArgumentException(msg, new UnmappableCharacterException(1));
-    }
-
-    static void throwUnmappable(byte[] val) {
-        int dp = 0;
-        while (dp < val.length && val[dp] >=0) { dp++; }
-        throwUnmappable(dp);
-    }
-
-    private static byte[] encodeUTF8(byte coder, byte[] val, boolean doReplace) {
-        if (coder == UTF16)
-            return encodeUTF8_UTF16(val, doReplace);
-
-        if (!hasNegatives(val, 0, val.length))
-            return Arrays.copyOf(val, val.length);
-
-        int dp = 0;
-        byte[] dst = new byte[val.length << 1];
-        for (byte c : val) {
-            if (c < 0) {
-                dst[dp++] = (byte) (0xc0 | ((c & 0xff) >> 6));
-                dst[dp++] = (byte) (0x80 | (c & 0x3f));
-            } else {
-                dst[dp++] = c;
-            }
-        }
-        if (dp == dst.length)
-            return dst;
-        return Arrays.copyOf(dst, dp);
-    }
-
-    private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
-        int dp = 0;
-        int sp = 0;
-        int sl = val.length >> 1;
-        byte[] dst = new byte[sl * 3];
-        char c;
-        while (sp < sl && (c = StringUTF16.getChar(val, sp)) < '\u0080') {
-            // ascii fast loop;
-            dst[dp++] = (byte)c;
-            sp++;
-        }
-        while (sp < sl) {
-            c = StringUTF16.getChar(val, sp++);
-            if (c < 0x80) {
-                dst[dp++] = (byte)c;
-            } else if (c < 0x800) {
-                dst[dp++] = (byte)(0xc0 | (c >> 6));
-                dst[dp++] = (byte)(0x80 | (c & 0x3f));
-            } else if (Character.isSurrogate(c)) {
-                int uc = -1;
-                char c2;
-                if (Character.isHighSurrogate(c) && sp < sl &&
-                    Character.isLowSurrogate(c2 = StringUTF16.getChar(val, sp))) {
-                    uc = Character.toCodePoint(c, c2);
-                }
-                if (uc < 0) {
-                    if (doReplace) {
-                        dst[dp++] = '?';
-                    } else {
-                        throwUnmappable(sp - 1);
-                    }
-                } else {
-                    dst[dp++] = (byte)(0xf0 | ((uc >> 18)));
-                    dst[dp++] = (byte)(0x80 | ((uc >> 12) & 0x3f));
-                    dst[dp++] = (byte)(0x80 | ((uc >>  6) & 0x3f));
-                    dst[dp++] = (byte)(0x80 | (uc & 0x3f));
-                    sp++;  // 2 chars
-                }
-            } else {
-                // 3 bytes, 16 bits
-                dst[dp++] = (byte)(0xe0 | ((c >> 12)));
-                dst[dp++] = (byte)(0x80 | ((c >>  6) & 0x3f));
-                dst[dp++] = (byte)(0x80 | (c & 0x3f));
-            }
-        }
-        if (dp == dst.length) {
-            return dst;
-        }
-        return Arrays.copyOf(dst, dp);
-    }
-
-    /*
-     * Throws iae, instead of replacing, if unmappable.
-     */
-    static byte[] getBytesUTF8NoRepl(String s) {
-        return encodeUTF8(s.coder(), s.value(), false);
-    }
-
-    ////////////////////// for j.n.f.Files //////////////////////////
-
-    private static boolean isASCII(byte[] src) {
-        return !hasNegatives(src, 0, src.length);
-    }
-
-    /*
-     * Throws CCE, instead of replacing, if unmappable.
-     */
-    static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
-        try {
-            return getBytesNoRepl1(s, cs);
-        } catch (IllegalArgumentException e) {
-            //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
-            Throwable cause = e.getCause();
-            if (cause instanceof UnmappableCharacterException) {
-                throw (UnmappableCharacterException)cause;
-            }
-            throw (CharacterCodingException)cause;
-        }
-    }
-
-    static byte[] getBytesNoRepl1(String s, Charset cs) {
-        byte[] val = s.value();
-        byte coder = s.coder();
-        if (cs == UTF_8) {
-            if (coder == LATIN1 && isASCII(val)) {
-                return val;
-            }
-            return encodeUTF8(coder, val, false);
-        }
-        if (cs == ISO_8859_1) {
-            if (coder == LATIN1) {
-                return val;
-            }
-            return encode8859_1(coder, val, false);
-        }
-        if (cs == US_ASCII) {
-            if (coder == LATIN1) {
-                if (isASCII(val)) {
-                    return val;
-                } else {
-                    throwUnmappable(val);
-                }
-            }
-        }
-        CharsetEncoder ce = cs.newEncoder();
-        // fastpath for ascii compatible
-        if (coder == LATIN1 && (((ce instanceof ArrayEncoder) &&
-                                 ((ArrayEncoder)ce).isASCIICompatible() &&
-                                 isASCII(val)))) {
-            return val;
-        }
-        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-        int en = scale(len, ce.maxBytesPerChar());
-        byte[] ba = new byte[en];
-        if (len == 0) {
-            return ba;
-        }
-        if (ce instanceof ArrayEncoder) {
-            int blen = (coder == LATIN1 ) ? ((ArrayEncoder)ce).encodeFromLatin1(val, 0, len, ba)
-                                          : ((ArrayEncoder)ce).encodeFromUTF16(val, 0, len, ba);
-            if (blen != -1) {
-                return safeTrim(ba, blen, true);
-            }
-        }
-        boolean isTrusted = cs.getClass().getClassLoader0() == null ||
-                            System.getSecurityManager() == null;
-        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-                                       : StringUTF16.toChars(val);
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-        try {
-            CoderResult cr = ce.encode(cb, bb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = ce.flush(bb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new IllegalArgumentException(x);
-        }
-        return safeTrim(ba, bb.position(), isTrusted);
-    }
 }
diff --git a/src/java.base/share/classes/java/lang/System.java b/src/java.base/share/classes/java/lang/System.java
index e536818fbf6e9..db1b20b3fd703 100644
--- a/src/java.base/share/classes/java/lang/System.java
+++ b/src/java.base/share/classes/java/lang/System.java
@@ -2266,7 +2266,7 @@ public String newStringNoRepl(byte[] bytes, Charset cs) throws CharacterCodingEx
             }
 
             public byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
-                return StringCoding.getBytesNoRepl(s, cs);
+                return String.getBytesNoRepl(s, cs);
             }
 
             public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
@@ -2274,7 +2274,7 @@ public String newStringUTF8NoRepl(byte[] bytes, int off, int len) {
             }
 
             public byte[] getBytesUTF8NoRepl(String s) {
-                return StringCoding.getBytesUTF8NoRepl(s);
+                return String.getBytesUTF8NoRepl(s);
             }
 
             public void setCause(Throwable t, Throwable cause) {
diff --git a/test/micro/org/openjdk/bench/java/lang/StringEncode.java b/test/micro/org/openjdk/bench/java/lang/StringEncode.java
new file mode 100644
index 0000000000000..4cf5032a0dad3
--- /dev/null
+++ b/test/micro/org/openjdk/bench/java/lang/StringEncode.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright (c) 2021, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+package org.openjdk.bench.java.lang;
+
+import org.openjdk.jmh.annotations.*;
+import org.openjdk.jmh.infra.Blackhole;
+
+import java.nio.charset.Charset;
+import java.util.concurrent.TimeUnit;
+
+@BenchmarkMode(Mode.AverageTime)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@Fork(value = 3, jvmArgs = "-Xmx1g")
+@Warmup(iterations = 5, time = 2)
+@Measurement(iterations = 5, time = 3)
+@State(Scope.Thread)
+public class StringEncode {
+
+    @BenchmarkMode(Mode.AverageTime)
+    @OutputTimeUnit(TimeUnit.NANOSECONDS)
+    @Fork(value = 3, jvmArgs = "-Xmx1g")
+    @Warmup(iterations = 5, time = 2)
+    @Measurement(iterations = 5, time = 2)
+    @State(Scope.Thread)
+    public static class WithCharset {
+
+        @Param({"US-ASCII", "ISO-8859-1", "UTF-8", "MS932", "ISO-8859-6"})
+        private String charsetName;
+
+        private Charset charset;
+        private String asciiString;
+        private String utf16String;
+
+        @Setup
+        public void setup() {
+            charset = Charset.forName(charsetName);
+            asciiString = "ascii string";
+            utf16String = "UTF-\uFF11\uFF16 string";
+        }
+
+        @Benchmark
+        public void encodeCharsetName(Blackhole bh) throws Exception {
+            bh.consume(asciiString.getBytes(charsetName));
+            bh.consume(utf16String.getBytes(charsetName));
+        }
+
+        @Benchmark
+        public void encodeCharset(Blackhole bh) throws Exception {
+            bh.consume(asciiString.getBytes(charset));
+            bh.consume(utf16String.getBytes(charset));
+        }
+    }
+
+    private String asciiDefaultString;
+    private String utf16DefaultString;
+
+    @Setup
+    public void setup() {
+        asciiDefaultString = "ascii string";
+        utf16DefaultString = "UTF-\uFF11\uFF16 string";
+    }
+
+    @Benchmark
+    public void encodeDefault(Blackhole bh) throws Exception {
+        bh.consume(asciiDefaultString.getBytes());
+        bh.consume(utf16DefaultString.getBytes());
+    }
+}

From 6808d4dbd72f6c3e4ba50c0e800c88fc6e469da8 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 21 Jan 2021 21:24:10 +0100
Subject: [PATCH 22/25] Simplify getBytes -> encode

---
 .../share/classes/java/lang/String.java       | 23 +++++++------------
 1 file changed, 8 insertions(+), 15 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index 1ed1bedb6c080..dd6936471172f 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -816,16 +816,7 @@ private static Charset lookupCharset(String csn) throws UnsupportedEncodingExcep
         }
     }
 
-    private static byte[] encode(String csn, byte coder, byte[] val)
-            throws UnsupportedEncodingException
-    {
-        return encode(lookupCharset(csn), coder, val);
-    }
-
     private static byte[] encode(Charset cs, byte coder, byte[] val) {
-        if (val.length == 0) {
-            return "".value();
-        }
         if (cs == UTF_8) {
             return encodeUTF8(coder, val, true);
         }
@@ -846,6 +837,9 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
                 return Arrays.copyOf(val, val.length);
             }
             byte[] ba = new byte[en];
+            if (len == 0) {
+                return ba;
+            }
             ce.onMalformedInput(CodingErrorAction.REPLACE)
                     .onUnmappableCharacter(CodingErrorAction.REPLACE);
 
@@ -857,6 +851,9 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
         }
 
         byte[] ba = new byte[en];
+        if (len == 0) {
+            return ba;
+        }
         ce.onMalformedInput(CodingErrorAction.REPLACE)
                 .onUnmappableCharacter(CodingErrorAction.REPLACE);
         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
@@ -876,10 +873,6 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
         return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
     }
 
-    private static byte[] encode(byte coder, byte[] val) {
-        return encode(Charset.defaultCharset(), coder, val);
-    }
-
     private static byte[] encodeASCII(byte coder, byte[] val) {
         if (coder == LATIN1) {
             byte[] dst = new byte[val.length];
@@ -1766,7 +1759,7 @@ public void getBytes(int srcBegin, int srcEnd, byte dst[], int dstBegin) {
     public byte[] getBytes(String charsetName)
             throws UnsupportedEncodingException {
         if (charsetName == null) throw new NullPointerException();
-        return encode(charsetName, coder(), value);
+        return encode(lookupCharset(charsetName), coder(), value);
     }
 
     /**
@@ -1806,7 +1799,7 @@ public byte[] getBytes(Charset charset) {
      * @since      1.1
      */
     public byte[] getBytes() {
-        return encode(coder(), value);
+        return encode(Charset.defaultCharset(), coder(), value);
     }
 
     /**

From 2143cb3e59e1062ea8465720714dbd6cd17edafc Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Thu, 21 Jan 2021 21:43:45 +0100
Subject: [PATCH 23/25] Reduce code duplication in getBytes/getBytesNoRepl

---
 .../share/classes/java/lang/String.java       | 175 ++++++++----------
 1 file changed, 74 insertions(+), 101 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index dd6936471172f..f9c830bda1890 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -826,6 +826,10 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
         if (cs == US_ASCII) {
             return encodeASCII(coder, val);
         }
+        return encodeWithEncoder(cs, coder, val, true);
+    }
+
+    private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, boolean doReplace) {
         CharsetEncoder ce = cs.newEncoder();
         int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
         int en = scale(len, ce.maxBytesPerChar());
@@ -840,8 +844,10 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
             if (len == 0) {
                 return ba;
             }
-            ce.onMalformedInput(CodingErrorAction.REPLACE)
-                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
+            if (doReplace) {
+                ce.onMalformedInput(CodingErrorAction.REPLACE)
+                        .onUnmappableCharacter(CodingErrorAction.REPLACE);
+            }
 
             int blen = (coder == LATIN1) ? ae.encodeFromLatin1(val, 0, len, ba)
                     : ae.encodeFromUTF16(val, 0, len, ba);
@@ -854,8 +860,10 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
         if (len == 0) {
             return ba;
         }
-        ce.onMalformedInput(CodingErrorAction.REPLACE)
-                .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        if (doReplace) {
+            ce.onMalformedInput(CodingErrorAction.REPLACE)
+                    .onUnmappableCharacter(CodingErrorAction.REPLACE);
+        }
         char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
                 : StringUTF16.toChars(val);
         ByteBuffer bb = ByteBuffer.wrap(ba);
@@ -868,19 +876,75 @@ private static byte[] encode(Charset cs, byte coder, byte[] val) {
             if (!cr.isUnderflow())
                 cr.throwException();
         } catch (CharacterCodingException x) {
-            throw new Error(x);
+            if (doReplace) {
+                throw new IllegalArgumentException(x);
+            } else {
+                throw new Error(x);
+            }
         }
         return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null);
     }
 
+    /*
+     * Throws iae, instead of replacing, if unmappable.
+     */
+    static byte[] getBytesUTF8NoRepl(String s) {
+        return encodeUTF8(s.coder(), s.value(), false);
+    }
+
+    private static boolean isASCII(byte[] src) {
+        return !StringCoding.hasNegatives(src, 0, src.length);
+    }
+
+    /*
+     * Throws CCE, instead of replacing, if unmappable.
+     */
+    static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
+        try {
+            return getBytesNoRepl1(s, cs);
+        } catch (IllegalArgumentException e) {
+            //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
+            Throwable cause = e.getCause();
+            if (cause instanceof UnmappableCharacterException) {
+                throw (UnmappableCharacterException)cause;
+            }
+            throw (CharacterCodingException)cause;
+        }
+    }
+
+    private static byte[] getBytesNoRepl1(String s, Charset cs) {
+        byte[] val = s.value();
+        byte coder = s.coder();
+        if (cs == UTF_8) {
+            if (coder == LATIN1 && isASCII(val)) {
+                return val;
+            }
+            return encodeUTF8(coder, val, false);
+        }
+        if (cs == ISO_8859_1) {
+            if (coder == LATIN1) {
+                return val;
+            }
+            return encode8859_1(coder, val, false);
+        }
+        if (cs == US_ASCII) {
+            if (coder == LATIN1) {
+                if (isASCII(val)) {
+                    return val;
+                } else {
+                    throwUnmappable(val);
+                }
+            }
+        }
+        return encodeWithEncoder(cs, coder, val, false);
+    }
+
     private static byte[] encodeASCII(byte coder, byte[] val) {
         if (coder == LATIN1) {
-            byte[] dst = new byte[val.length];
-            for (int i = 0; i < val.length; i++) {
-                if (val[i] < 0) {
+            byte[] dst = Arrays.copyOf(val, val.length);
+            for (int i = 0; i < dst.length; i++) {
+                if (dst[i] < 0) {
                     dst[i] = '?';
-                } else {
-                    dst[i] = val[i];
                 }
             }
             return dst;
@@ -1241,97 +1305,6 @@ private static byte[] encodeUTF8_UTF16(byte[] val, boolean doReplace) {
         return Arrays.copyOf(dst, dp);
     }
 
-    /*
-     * Throws iae, instead of replacing, if unmappable.
-     */
-    static byte[] getBytesUTF8NoRepl(String s) {
-        return encodeUTF8(s.coder(), s.value(), false);
-    }
-
-    ////////////////////// for j.n.f.Files //////////////////////////
-
-    private static boolean isASCII(byte[] src) {
-        return !StringCoding.hasNegatives(src, 0, src.length);
-    }
-
-    /*
-     * Throws CCE, instead of replacing, if unmappable.
-     */
-    static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingException {
-        try {
-            return getBytesNoRepl1(s, cs);
-        } catch (IllegalArgumentException e) {
-            //getBytesNoRepl1 throws IAE with UnmappableCharacterException or CCE as the cause
-            Throwable cause = e.getCause();
-            if (cause instanceof UnmappableCharacterException) {
-                throw (UnmappableCharacterException)cause;
-            }
-            throw (CharacterCodingException)cause;
-        }
-    }
-
-    private static byte[] getBytesNoRepl1(String s, Charset cs) {
-        byte[] val = s.value();
-        byte coder = s.coder();
-        if (cs == UTF_8) {
-            if (coder == LATIN1 && isASCII(val)) {
-                return val;
-            }
-            return encodeUTF8(coder, val, false);
-        }
-        if (cs == ISO_8859_1) {
-            if (coder == LATIN1) {
-                return val;
-            }
-            return encode8859_1(coder, val, false);
-        }
-        if (cs == US_ASCII) {
-            if (coder == LATIN1) {
-                if (isASCII(val)) {
-                    return val;
-                } else {
-                    throwUnmappable(val);
-                }
-            }
-        }
-        CharsetEncoder ce = cs.newEncoder();
-        // fastpath for ascii compatible
-        if (coder == LATIN1 &&
-                ce instanceof ArrayEncoder ae &&
-                ae.isASCIICompatible() &&
-                isASCII(val)) {
-            return val;
-        }
-        int len = val.length >> coder;  // assume LATIN1=0/UTF16=1;
-        int en = scale(len, ce.maxBytesPerChar());
-        byte[] ba = new byte[en];
-        if (len == 0) {
-            return ba;
-        }
-        if (ce instanceof ArrayEncoder ae) {
-            int blen = (coder == LATIN1 ) ? ae.encodeFromLatin1(val, 0, len, ba)
-                                          : ae.encodeFromUTF16(val, 0, len, ba);
-            if (blen != -1) {
-                return safeTrim(ba, blen, true);
-            }
-        }
-        char[] ca = (coder == LATIN1 ) ? StringLatin1.toChars(val)
-                                       : StringUTF16.toChars(val);
-        ByteBuffer bb = ByteBuffer.wrap(ba);
-        CharBuffer cb = CharBuffer.wrap(ca, 0, len);
-        try {
-            CoderResult cr = ce.encode(cb, bb, true);
-            if (!cr.isUnderflow())
-                cr.throwException();
-            cr = ce.flush(bb);
-            if (!cr.isUnderflow())
-                cr.throwException();
-        } catch (CharacterCodingException x) {
-            throw new IllegalArgumentException(x);
-        }
-        return safeTrim(ba, bb.position(), cs.getClass().getClassLoader0() == null );
-    }
-
     /**
      * Constructs a new {@code String} by decoding the specified array of bytes
      * using the specified {@linkplain java.nio.charset.Charset charset}.  The

From feb8201dff898dd800161f83fcff0d001322780f Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 22 Jan 2021 00:14:47 +0100
Subject: [PATCH 24/25] Remove StringCoding Charset constants

---
 .../share/classes/java/lang/String.java       | 32 +++++++++----------
 .../share/classes/java/lang/StringCoding.java |  7 ----
 2 files changed, 16 insertions(+), 23 deletions(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index f9c830bda1890..cbdb297e5970b 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -56,9 +56,9 @@
 import sun.nio.cs.ArrayDecoder;
 import sun.nio.cs.ArrayEncoder;
 
-import static java.lang.StringCoding.ISO_8859_1;
-import static java.lang.StringCoding.US_ASCII;
-import static java.lang.StringCoding.UTF_8;
+import sun.nio.cs.ISO_8859_1;
+import sun.nio.cs.US_ASCII;
+import sun.nio.cs.UTF_8;
 
 /**
  * The {@code String} class represents character strings. All
@@ -522,7 +522,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
         if (length == 0) {
             this.value = "".value;
             this.coder = "".coder;
-        } else if (charset == UTF_8) {
+        } else if (charset == UTF_8.INSTANCE) {
             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                 this.coder = LATIN1;
@@ -575,7 +575,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 this.value = dst;
                 this.coder = UTF16;
             }
-        } else if (charset == ISO_8859_1) {
+        } else if (charset == ISO_8859_1.INSTANCE) {
             if (COMPACT_STRINGS) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                 this.coder = LATIN1;
@@ -583,7 +583,7 @@ public String(byte[] bytes, int offset, int length, Charset charset) {
                 this.value = StringLatin1.inflate(bytes, offset, length);
                 this.coder = UTF16;
             }
-        } else if (charset == US_ASCII) {
+        } else if (charset == US_ASCII.INSTANCE) {
             if (COMPACT_STRINGS && !StringCoding.hasNegatives(bytes, offset, length)) {
                 this.value = Arrays.copyOfRange(bytes, offset, offset + length);
                 this.coder = LATIN1;
@@ -749,15 +749,15 @@ private static String newStringNoRepl1(byte[] src, Charset cs) {
         if (len == 0) {
             return "";
         }
-        if (cs == UTF_8) {
+        if (cs == UTF_8.INSTANCE) {
             return newStringUTF8NoRepl(src, 0, src.length);
         }
-        if (cs == ISO_8859_1) {
+        if (cs == ISO_8859_1.INSTANCE) {
             if (COMPACT_STRINGS)
                 return new String(src, LATIN1);
             return new String(StringLatin1.inflate(src, 0, src.length), UTF16);
         }
-        if (cs == US_ASCII) {
+        if (cs == US_ASCII.INSTANCE) {
             if (!StringCoding.hasNegatives(src, 0, src.length)) {
                 if (COMPACT_STRINGS)
                     return new String(src, LATIN1);
@@ -772,7 +772,7 @@ private static String newStringNoRepl1(byte[] src, Charset cs) {
         if (cd instanceof ArrayDecoder ad &&
                 ad.isASCIICompatible() &&
                 !StringCoding.hasNegatives(src, 0, src.length)) {
-            return new String(src, 0, src.length, ISO_8859_1);
+            return new String(src, 0, src.length, ISO_8859_1.INSTANCE);
         }
         int en = scale(len, cd.maxCharsPerByte());
         char[] ca = new char[en];
@@ -817,13 +817,13 @@ private static Charset lookupCharset(String csn) throws UnsupportedEncodingExcep
     }
 
     private static byte[] encode(Charset cs, byte coder, byte[] val) {
-        if (cs == UTF_8) {
+        if (cs == UTF_8.INSTANCE) {
             return encodeUTF8(coder, val, true);
         }
-        if (cs == ISO_8859_1) {
+        if (cs == ISO_8859_1.INSTANCE) {
             return encode8859_1(coder, val);
         }
-        if (cs == US_ASCII) {
+        if (cs == US_ASCII.INSTANCE) {
             return encodeASCII(coder, val);
         }
         return encodeWithEncoder(cs, coder, val, true);
@@ -915,19 +915,19 @@ static byte[] getBytesNoRepl(String s, Charset cs) throws CharacterCodingExcepti
     private static byte[] getBytesNoRepl1(String s, Charset cs) {
         byte[] val = s.value();
         byte coder = s.coder();
-        if (cs == UTF_8) {
+        if (cs == UTF_8.INSTANCE) {
             if (coder == LATIN1 && isASCII(val)) {
                 return val;
             }
             return encodeUTF8(coder, val, false);
         }
-        if (cs == ISO_8859_1) {
+        if (cs == ISO_8859_1.INSTANCE) {
             if (coder == LATIN1) {
                 return val;
             }
             return encode8859_1(coder, val, false);
         }
-        if (cs == US_ASCII) {
+        if (cs == US_ASCII.INSTANCE) {
             if (coder == LATIN1) {
                 if (isASCII(val)) {
                     return val;
diff --git a/src/java.base/share/classes/java/lang/StringCoding.java b/src/java.base/share/classes/java/lang/StringCoding.java
index 22439ed6ea30b..4efa1a19c1d8b 100644
--- a/src/java.base/share/classes/java/lang/StringCoding.java
+++ b/src/java.base/share/classes/java/lang/StringCoding.java
@@ -25,11 +25,8 @@
 
 package java.lang;
 
-import java.nio.charset.Charset;
-
 import jdk.internal.vm.annotation.IntrinsicCandidate;
 
-
 /**
  * Utility class for string encoding and decoding.
  */
@@ -37,10 +34,6 @@ class StringCoding {
 
     private StringCoding() { }
 
-    static final Charset ISO_8859_1 = sun.nio.cs.ISO_8859_1.INSTANCE;
-    static final Charset US_ASCII = sun.nio.cs.US_ASCII.INSTANCE;
-    static final Charset UTF_8 = sun.nio.cs.UTF_8.INSTANCE;
-
     /**
      *  Print a message directly to stderr, bypassing all character conversion
      *  methods.

From 14928bfd9254422e56987f4bd1d4d1f79a7a5b62 Mon Sep 17 00:00:00 2001
From: Claes Redestad <claes.redestad@oracle.com>
Date: Fri, 22 Jan 2021 00:15:35 +0100
Subject: [PATCH 25/25] Logic error in exception handling in encodeWithEncoder

---
 src/java.base/share/classes/java/lang/String.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/java.base/share/classes/java/lang/String.java b/src/java.base/share/classes/java/lang/String.java
index cbdb297e5970b..5c175b125dcb0 100644
--- a/src/java.base/share/classes/java/lang/String.java
+++ b/src/java.base/share/classes/java/lang/String.java
@@ -876,7 +876,7 @@ private static byte[] encodeWithEncoder(Charset cs, byte coder, byte[] val, bool
             if (!cr.isUnderflow())
                 cr.throwException();
         } catch (CharacterCodingException x) {
-            if (doReplace) {
+            if (!doReplace) {
                 throw new IllegalArgumentException(x);
             } else {
                 throw new Error(x);