Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 23 additions & 6 deletions src/java.base/share/classes/java/nio/charset/Charset.java
Original file line number Diff line number Diff line change
Expand Up @@ -168,37 +168,54 @@
* <tr><th scope="row" style="vertical-align:top">{@code UTF-16}</th>
* <td>Sixteen-bit UCS Transformation Format,
* byte&nbsp;order identified by an optional byte-order mark</td></tr>
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32BE}</th>
* <td>Thirty-two-bit UCS Transformation Format,
* big-endian byte&nbsp;order</td></tr>
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32LE}</th>
* <td>Thirty-two-bit UCS Transformation Format,
* little-endian byte&nbsp;order</td></tr>
* <tr><th scope="row" style="vertical-align:top">{@code UTF-32}</th>
* <td>Thirty-two-bit UCS Transformation Format,
* byte&nbsp;order identified by an optional byte-order mark</td></tr>
* </tbody>
* </table></blockquote>
*
* <p> The {@code UTF-8} charset is specified by <a
* href="http://www.ietf.org/rfc/rfc2279.txt"><i>RFC&nbsp;2279</i></a>; the
* transformation format upon which it is based is specified in
* Amendment&nbsp;2 of ISO&nbsp;10646-1 and is also described in the <a
* ISO&nbsp;10646-1 and is also described in the <a
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
* Standard</i></a>.
*
* <p> The {@code UTF-16} charsets are specified by <a
* href="http://www.ietf.org/rfc/rfc2781.txt"><i>RFC&nbsp;2781</i></a>; the
* transformation formats upon which they are based are specified in
* Amendment&nbsp;1 of ISO&nbsp;10646-1 and are also described in the <a
* ISO&nbsp;10646-1 and are also described in the <a
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
* Standard</i></a>.
*
* <p> The {@code UTF-32} charsets are based upon transformation formats
* which are specified in
* ISO&nbsp;10646-1 and are also described in the <a
* href="http://www.unicode.org/standard/standard.html"><i>Unicode
* Standard</i></a>.
*
* <p> The {@code UTF-16} charsets use sixteen-bit quantities and are
* <p> The {@code UTF-16} and {@code UTF-32} charsets use sixteen-bit and thirty-two-bit
* quantities respectively, and are
* therefore sensitive to byte order. In these encodings the byte order of a
* stream may be indicated by an initial <i>byte-order mark</i> represented by
* the Unicode character <code>'&#92;uFEFF'</code>. Byte-order marks are handled
* the Unicode character {@code U+FEFF}. Byte-order marks are handled
* as follows:
*
* <ul>
*
* <li><p> When decoding, the {@code UTF-16BE} and {@code UTF-16LE}
* <li><p> When decoding, the {@code UTF-16BE}, {@code UTF-16LE},
* {@code UTF-32BE}, and {@code UTF-32LE}
* charsets interpret the initial byte-order marks as a <small>ZERO-WIDTH
* NON-BREAKING SPACE</small>; when encoding, they do not write
* byte-order marks. </p></li>
*
* <li><p> When decoding, the {@code UTF-16} charset interprets the
* <li><p> When decoding, the {@code UTF-16} and {@code UTF-32} charsets interpret the
* byte-order mark at the beginning of the input stream to indicate the
* byte-order of the stream but defaults to big-endian if there is no
* byte-order mark; when encoding, it uses big-endian byte order and writes
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -74,4 +74,23 @@ private StandardCharsets() {
* optional byte-order mark.
*/
public static final Charset UTF_16 = new sun.nio.cs.UTF_16();

/**
* Thirty-two-bit UCS Transformation Format, big-endian byte order.
* @since 22
*/
public static final Charset UTF_32BE = new sun.nio.cs.UTF_32BE();

/**
* Thirty-two-bit UCS Transformation Format, little-endian byte order.
* @since 22
*/
public static final Charset UTF_32LE = new sun.nio.cs.UTF_32LE();

/**
* Thirty-two-bit UCS Transformation Format, byte order identified by an
* optional byte-order mark.
* @since 22
*/
public static final Charset UTF_32 = new sun.nio.cs.UTF_32();
}
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,9 @@ public class StandardCharsets extends CharsetProvider {
map.put("utf-16", java.nio.charset.StandardCharsets.UTF_16);
map.put("utf-16be", java.nio.charset.StandardCharsets.UTF_16BE);
map.put("utf-16le", java.nio.charset.StandardCharsets.UTF_16LE);
map.put("utf-32", java.nio.charset.StandardCharsets.UTF_32);
map.put("utf-32be", java.nio.charset.StandardCharsets.UTF_32BE);
map.put("utf-32le", java.nio.charset.StandardCharsets.UTF_32LE);
cache = map;
}
return map;
Expand Down
17 changes: 14 additions & 3 deletions test/jdk/java/nio/charset/StandardCharsets/Standard.java
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2011, 2014, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2023, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand All @@ -23,7 +23,7 @@

/*
* @test
* @bug 4884238
* @bug 4884238 8310047
* @summary Test standard charset name constants.
* @author Mike Duigou
* @run main Standard
Expand All @@ -41,7 +41,9 @@ public class Standard {

private final static String standardCharsets[] = {
"US-ASCII", "ISO-8859-1", "UTF-8",
"UTF-16BE", "UTF-16LE", "UTF-16" };
"UTF-16BE", "UTF-16LE", "UTF-16",
"UTF-32BE", "UTF-32LE", "UTF-32",
};

public static void realMain(String[] args) {
check(StandardCharsets.US_ASCII instanceof Charset);
Expand All @@ -50,20 +52,29 @@ public static void realMain(String[] args) {
check(StandardCharsets.UTF_16BE instanceof Charset);
check(StandardCharsets.UTF_16LE instanceof Charset);
check(StandardCharsets.UTF_16 instanceof Charset);
check(StandardCharsets.UTF_32BE instanceof Charset);
check(StandardCharsets.UTF_32LE instanceof Charset);
check(StandardCharsets.UTF_32 instanceof Charset);

check("US-ASCII".equals(StandardCharsets.US_ASCII.name()));
check("ISO-8859-1".equals(StandardCharsets.ISO_8859_1.name()));
check("UTF-8".equals(StandardCharsets.UTF_8.name()));
check("UTF-16BE".equals(StandardCharsets.UTF_16BE.name()));
check("UTF-16LE".equals(StandardCharsets.UTF_16LE.name()));
check("UTF-16".equals(StandardCharsets.UTF_16.name()));
check("UTF-32BE".equals(StandardCharsets.UTF_32BE.name()));
check("UTF-32LE".equals(StandardCharsets.UTF_32LE.name()));
check("UTF-32".equals(StandardCharsets.UTF_32.name()));

check(Charset.forName("US-ASCII") == StandardCharsets.US_ASCII);
check(Charset.forName("ISO-8859-1") == StandardCharsets.ISO_8859_1);
check(Charset.forName("UTF-8") == StandardCharsets.UTF_8);
check(Charset.forName("UTF-16BE") == StandardCharsets.UTF_16BE);
check(Charset.forName("UTF-16LE") == StandardCharsets.UTF_16LE);
check(Charset.forName("UTF-16") == StandardCharsets.UTF_16);
check(Charset.forName("UTF-32BE") == StandardCharsets.UTF_32BE);
check(Charset.forName("UTF-32LE") == StandardCharsets.UTF_32LE);
check(Charset.forName("UTF-32") == StandardCharsets.UTF_32);

Set<String> charsets = new HashSet<>();
Field standardCharsetFields[] = StandardCharsets.class.getFields();
Expand Down