Skip to content

Commit

Permalink
8231717: Improve performance of charset decoding when charset is alwa…
Browse files Browse the repository at this point in the history
…ys compactable

Backport-of: 0dbfc97
  • Loading branch information
Dmitry Chuyko authored and RealCLanger committed Jun 24, 2021
1 parent 48c1f60 commit 34d1c92
Show file tree
Hide file tree
Showing 5 changed files with 66 additions and 6 deletions.
4 changes: 2 additions & 2 deletions make/data/charsetmapping/SingleByte-X.java.template
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -48,7 +48,7 @@ public class $NAME_CLZ$ extends Charset implements HistoricallyNamedCharset
}

public CharsetDecoder newDecoder() {
return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$);
return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$, $LATIN1DECODABLE$);
}

public CharsetEncoder newEncoder() {
Expand Down
9 changes: 8 additions & 1 deletion make/jdk/src/classes/build/tools/charsetmapping/SBCS.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -46,6 +46,7 @@ public static void genClass(Charset cs,
String hisName = cs.hisName;
String pkgName = cs.pkgName;
boolean isASCII = cs.isASCII;
boolean isLatin1Decodable = true;

StringBuilder b2cSB = new StringBuilder();
StringBuilder b2cNRSB = new StringBuilder();
Expand All @@ -69,6 +70,9 @@ public static void genClass(Charset cs,
c2bOff += 0x100;
c2bIndex[e.cp>>8] = 1;
}
if (e.cp > 0xFF) {
isLatin1Decodable = false;
}
}

Formatter fm = new Formatter(b2cSB);
Expand Down Expand Up @@ -178,6 +182,9 @@ public static void genClass(Charset cs,
if (line.indexOf("$ASCIICOMPATIBLE$") != -1) {
line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false");
}
if (line.indexOf("$LATIN1DECODABLE$") != -1) {
line = line.replace("$LATIN1DECODABLE$", isLatin1Decodable ? "true" : "false");
}
if (line.indexOf("$B2CTABLE$") != -1) {
line = line.replace("$B2CTABLE$", b2c);
}
Expand Down
15 changes: 14 additions & 1 deletion src/java.base/share/classes/java/lang/StringCoding.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -191,6 +191,12 @@ Result decode(byte[] ba, int off, int len) {
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
}
}
// fastpath for always Latin1 decodable single byte
if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
byte[] dst = new byte[len];
((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
return result.with(dst, LATIN1);
}
int en = scale(len, cd.maxCharsPerByte());
char[] ca = new char[en];
if (cd instanceof ArrayDecoder) {
Expand Down Expand Up @@ -278,6 +284,13 @@ static Result decode(Charset cs, byte[] ba, int off, int len) {
((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
return decodeLatin1(ba, off, len);
}
// fastpath for always Latin1 decodable single byte
if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
byte[] dst = new byte[len];
((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
return new Result().with(dst, LATIN1);
}

int en = scale(len, cd.maxCharsPerByte());
if (len == 0) {
return new Result().with();
Expand Down
15 changes: 14 additions & 1 deletion src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -28,6 +28,9 @@
/*
* FastPath byte[]->char[] decoder, REPLACE on malformed or
* unmappable input.
*
* FastPath encoded byte[]-> "String Latin1 coding" byte[] decoder for use when
* charset is always decodable to the internal String Latin1 coding byte[], ie. all mappings <=0xff
*/

public interface ArrayDecoder {
Expand All @@ -36,4 +39,14 @@ public interface ArrayDecoder {
default boolean isASCIICompatible() {
return false;
}

// Is always decodable to internal String Latin1 coding, ie. all mappings <= 0xff
default boolean isLatin1Decodable() {
return false;
}

// Decode to internal String Latin1 coding byte[] fastpath for when isLatin1Decodable == true
default int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
return 0;
}
}
29 changes: 28 additions & 1 deletion src/java.base/share/classes/sun/nio/cs/SingleByte.java
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
Expand Down Expand Up @@ -50,17 +50,27 @@ public static final class Decoder extends CharsetDecoder
implements ArrayDecoder {
private final char[] b2c;
private final boolean isASCIICompatible;
private final boolean isLatin1Decodable;

public Decoder(Charset cs, char[] b2c) {
super(cs, 1.0f, 1.0f);
this.b2c = b2c;
this.isASCIICompatible = false;
this.isLatin1Decodable = false;
}

public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible) {
super(cs, 1.0f, 1.0f);
this.b2c = b2c;
this.isASCIICompatible = isASCIICompatible;
this.isLatin1Decodable = false;
}

public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible, boolean isLatin1Decodable) {
super(cs, 1.0f, 1.0f);
this.b2c = b2c;
this.isASCIICompatible = isASCIICompatible;
this.isLatin1Decodable = isLatin1Decodable;
}

private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
Expand Down Expand Up @@ -124,6 +134,18 @@ protected void implReplaceWith(String newReplacement) {
repl = newReplacement.charAt(0);
}

@Override
public int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
if (len > dst.length)
len = dst.length;

int dp = 0;
while (dp < len) {
dst[dp++] = (byte)decode(src[sp++]);
}
return dp;
}

@Override
public int decode(byte[] src, int sp, int len, char[] dst) {
if (len > dst.length)
Expand All @@ -143,6 +165,11 @@ public int decode(byte[] src, int sp, int len, char[] dst) {
public boolean isASCIICompatible() {
return isASCIICompatible;
}

@Override
public boolean isLatin1Decodable() {
return isLatin1Decodable;
}
}

public static final class Encoder extends CharsetEncoder
Expand Down

1 comment on commit 34d1c92

@openjdk-notifier
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.