Skip to content
Closed
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 19 additions & 12 deletions jdk/test/sun/nio/cs/TestCharsetMapping.java
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ private static String plural(int n) {
// EBCDIC encodings, which indicates the need of adding/
// removing the shift bytes.
private boolean shiftHackDBCS = false;
// 8u does not have JDK-8186803 so leHackIBM is true for
// 8u does not have JDK-8186803 so ebcdicLFHack is true for
// IBM1140-1149 charsets that map U+000A to 0x25.
private boolean leHackIBM = false;
private boolean ebcdicLFHack = false;

private TestCharsetMapping(CharsetInfo csinfo) throws Exception {
this.csinfo = csinfo;
Expand Down Expand Up @@ -248,7 +248,7 @@ private boolean encode(ByteBuffer refBytes, CharBuffer refChars)
+ printBytes(rbs));
}
if (!eq) {
if (leHackIBM && cp == 0xA) {
if (ebcdicLFHack && cp == 0xA) {
log.println(printCodePoint(cp) + " --> "
+ printBytes(obs) +
" allowed for IBM0114x");
Expand Down Expand Up @@ -387,7 +387,7 @@ private boolean testStringConv() throws Exception {
private boolean run() throws Exception {
boolean rv = true;
shiftHackDBCS = csinfo.type.equals("ebcdic"); // isStateful;
leHackIBM = csinfo.csName.startsWith("IBM0114"); // Maps LF to 0x25, JDK-8186803
ebcdicLFHack = csinfo.csName.startsWith("IBM0114"); // Maps LF to 0x25, JDK-8186803

// (1) new String()/String.getBytes()
rv &= testStringConv();
Expand Down Expand Up @@ -472,11 +472,6 @@ private static class CharsetInfo {
CharsetInfo(String csName, String clzName) {
this.csName = csName;
this.clzName = clzName;
if (csName.endsWith("_Solaris") ||
csName.endsWith("_MS5022X") ||
csName.endsWith("_MS932")) {
isInternal = true;
}
}

private Entry parse(Matcher m) {
Expand Down Expand Up @@ -539,14 +534,26 @@ private static Set<CharsetInfo> charsets(Path cslist, boolean sbcs) throws IOExc
if (tokens.length < 5) {
continue;
}
CharsetInfo cs = new CharsetInfo(tokens[1], tokens[0]);
// dbcs format (we ignore the fields after pkg)
// clzName csName hisName dbtype pkg ascii b1min b1max b2min b2max
// sbcs format
// clzName csName hisName containASCII pkg
CharsetInfo cs = new CharsetInfo(/* csName */ tokens[1],
/* clzName */ tokens[0]);
cs.hisName = tokens[2];
cs.pkgName = tokens[4];
if (sbcs) {
cs.type = "sbcs";
} else {
cs.type = tokens[3];
}
Copy link
Member

@tstuefe tstuefe Jun 22, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Mentally parsing:

Format 1, dbcs:
clzName csName hisName dbtype pkg ascii b1min b1max b2min b2max

hisname = 2
pkgName = 4
type = 3

Format 2, sbcs and extsbcs:
clzName csName hisName containASCII pkg

hisname = 2
pkgName = 4
type = sbcs

Okay, checks out.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could you add a small comment here, listing these two formats for easier code reading:

// dbcs format
// clzName csName hisName dbtype pkg ascii b1min b1max b2min b2max
// sbcs format
//  clzName csName hisName containASCII pkg

?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe also some sanity tests:

  • pkgname to start with "sun.nio"
  • type one of "basic ebcdic euc_sim dbcsonly sbcs"

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The comment is a good idea. I was looking at that line in the mapping file as I was writing the code, and it would be good to have it in there directly.

I think the sanity tests are something we should add in mainline first and backport, as both would apply there too (though mainline has a few more types).

if (("JIS_X_0208_Solaris".equals(tokens[0])) ||
("JIS_X_0208_MS5022X".equals(tokens[0])) ||
("JIS_X_0208_MS932".equals(tokens[0])) ||
("JIS_X_0212_Solaris".equals(tokens[0])) ||
("JIS_X_0212_MS5022X".equals(tokens[0]))) {
cs.isInternal = true;
}
charsets.add(cs);
}
return charsets;
Expand All @@ -570,10 +577,10 @@ public static void main(String args[]) throws Exception {
int skipped = 0;
int known = 0;

Set<CharsetInfo> charsets = charsets(dir.resolve("dbcs"), false);
// sbcs files have fewer fields and a set type of sbcs
Set<CharsetInfo> charsets = charsets(dir.resolve("sbcs"), true);
charsets.addAll(charsets(dir.resolve("extsbcs"), true));
charsets.addAll(charsets(dir.resolve("dbcs"), false));
charsets.addAll(charsets(dir.resolve("sbcs"), true));

for (CharsetInfo csinfo : charsets) {
String csname = csinfo.csName;
Expand Down
1 change: 0 additions & 1 deletion jdk/test/sun/nio/cs/TestEBCDICLineFeed.java
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
/* @test
* @bug 8186803
* @summary Check if the linefeed is de/encoded correctly in ebcdic
* @modules jdk.charsets
*/

public class TestEBCDICLineFeed {
Expand Down