Skip to content

Commit 34d1c92

Browse files
Dmitry ChuykoRealCLanger
authored andcommitted
8231717: Improve performance of charset decoding when charset is always compactable
Backport-of: 0dbfc97
1 parent 48c1f60 commit 34d1c92

File tree

5 files changed

+66
-6
lines changed

5 files changed

+66
-6
lines changed

make/data/charsetmapping/SingleByte-X.java.template

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -48,7 +48,7 @@ public class $NAME_CLZ$ extends Charset implements HistoricallyNamedCharset
4848
}
4949

5050
public CharsetDecoder newDecoder() {
51-
return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$);
51+
return new SingleByte.Decoder(this, b2c, $ASCIICOMPATIBLE$, $LATIN1DECODABLE$);
5252
}
5353

5454
public CharsetEncoder newEncoder() {

make/jdk/src/classes/build/tools/charsetmapping/SBCS.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2008, 2015, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -46,6 +46,7 @@ public static void genClass(Charset cs,
4646
String hisName = cs.hisName;
4747
String pkgName = cs.pkgName;
4848
boolean isASCII = cs.isASCII;
49+
boolean isLatin1Decodable = true;
4950

5051
StringBuilder b2cSB = new StringBuilder();
5152
StringBuilder b2cNRSB = new StringBuilder();
@@ -69,6 +70,9 @@ public static void genClass(Charset cs,
6970
c2bOff += 0x100;
7071
c2bIndex[e.cp>>8] = 1;
7172
}
73+
if (e.cp > 0xFF) {
74+
isLatin1Decodable = false;
75+
}
7276
}
7377

7478
Formatter fm = new Formatter(b2cSB);
@@ -178,6 +182,9 @@ public static void genClass(Charset cs,
178182
if (line.indexOf("$ASCIICOMPATIBLE$") != -1) {
179183
line = line.replace("$ASCIICOMPATIBLE$", isASCII ? "true" : "false");
180184
}
185+
if (line.indexOf("$LATIN1DECODABLE$") != -1) {
186+
line = line.replace("$LATIN1DECODABLE$", isLatin1Decodable ? "true" : "false");
187+
}
181188
if (line.indexOf("$B2CTABLE$") != -1) {
182189
line = line.replace("$B2CTABLE$", b2c);
183190
}

src/java.base/share/classes/java/lang/StringCoding.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2000, 2018, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2000, 2019, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -191,6 +191,12 @@ Result decode(byte[] ba, int off, int len) {
191191
return result.with(StringLatin1.inflate(ba, off, len), UTF16);
192192
}
193193
}
194+
// fastpath for always Latin1 decodable single byte
195+
if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
196+
byte[] dst = new byte[len];
197+
((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
198+
return result.with(dst, LATIN1);
199+
}
194200
int en = scale(len, cd.maxCharsPerByte());
195201
char[] ca = new char[en];
196202
if (cd instanceof ArrayDecoder) {
@@ -278,6 +284,13 @@ static Result decode(Charset cs, byte[] ba, int off, int len) {
278284
((ArrayDecoder)cd).isASCIICompatible() && !hasNegatives(ba, off, len)) {
279285
return decodeLatin1(ba, off, len);
280286
}
287+
// fastpath for always Latin1 decodable single byte
288+
if (COMPACT_STRINGS && cd instanceof ArrayDecoder && ((ArrayDecoder)cd).isLatin1Decodable()) {
289+
byte[] dst = new byte[len];
290+
((ArrayDecoder)cd).decodeToLatin1(ba, off, len, dst);
291+
return new Result().with(dst, LATIN1);
292+
}
293+
281294
int en = scale(len, cd.maxCharsPerByte());
282295
if (len == 0) {
283296
return new Result().with();

src/java.base/share/classes/sun/nio/cs/ArrayDecoder.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2009, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2009, 2019, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -28,6 +28,9 @@
2828
/*
2929
* FastPath byte[]->char[] decoder, REPLACE on malformed or
3030
* unmappable input.
31+
*
32+
* FastPath encoded byte[]-> "String Latin1 coding" byte[] decoder for use when
33+
* charset is always decodable to the internal String Latin1 coding byte[], ie. all mappings <=0xff
3134
*/
3235

3336
public interface ArrayDecoder {
@@ -36,4 +39,14 @@ public interface ArrayDecoder {
3639
default boolean isASCIICompatible() {
3740
return false;
3841
}
42+
43+
// Is always decodable to internal String Latin1 coding, ie. all mappings <= 0xff
44+
default boolean isLatin1Decodable() {
45+
return false;
46+
}
47+
48+
// Decode to internal String Latin1 coding byte[] fastpath for when isLatin1Decodable == true
49+
default int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
50+
return 0;
51+
}
3952
}

src/java.base/share/classes/sun/nio/cs/SingleByte.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (c) 2008, 2012, Oracle and/or its affiliates. All rights reserved.
2+
* Copyright (c) 2008, 2019, Oracle and/or its affiliates. All rights reserved.
33
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
44
*
55
* This code is free software; you can redistribute it and/or modify it
@@ -50,17 +50,27 @@ public static final class Decoder extends CharsetDecoder
5050
implements ArrayDecoder {
5151
private final char[] b2c;
5252
private final boolean isASCIICompatible;
53+
private final boolean isLatin1Decodable;
5354

5455
public Decoder(Charset cs, char[] b2c) {
5556
super(cs, 1.0f, 1.0f);
5657
this.b2c = b2c;
5758
this.isASCIICompatible = false;
59+
this.isLatin1Decodable = false;
5860
}
5961

6062
public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible) {
6163
super(cs, 1.0f, 1.0f);
6264
this.b2c = b2c;
6365
this.isASCIICompatible = isASCIICompatible;
66+
this.isLatin1Decodable = false;
67+
}
68+
69+
public Decoder(Charset cs, char[] b2c, boolean isASCIICompatible, boolean isLatin1Decodable) {
70+
super(cs, 1.0f, 1.0f);
71+
this.b2c = b2c;
72+
this.isASCIICompatible = isASCIICompatible;
73+
this.isLatin1Decodable = isLatin1Decodable;
6474
}
6575

6676
private CoderResult decodeArrayLoop(ByteBuffer src, CharBuffer dst) {
@@ -124,6 +134,18 @@ protected void implReplaceWith(String newReplacement) {
124134
repl = newReplacement.charAt(0);
125135
}
126136

137+
@Override
138+
public int decodeToLatin1(byte[] src, int sp, int len, byte[] dst) {
139+
if (len > dst.length)
140+
len = dst.length;
141+
142+
int dp = 0;
143+
while (dp < len) {
144+
dst[dp++] = (byte)decode(src[sp++]);
145+
}
146+
return dp;
147+
}
148+
127149
@Override
128150
public int decode(byte[] src, int sp, int len, char[] dst) {
129151
if (len > dst.length)
@@ -143,6 +165,11 @@ public int decode(byte[] src, int sp, int len, char[] dst) {
143165
public boolean isASCIICompatible() {
144166
return isASCIICompatible;
145167
}
168+
169+
@Override
170+
public boolean isLatin1Decodable() {
171+
return isLatin1Decodable;
172+
}
146173
}
147174

148175
public static final class Encoder extends CharsetEncoder

0 commit comments

Comments
 (0)