Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Browse files
Utf 8 encoding optimizations (#1444)
* fix: Correct typo weather to whether * misc: Change internal Encoding.testAsciiNumbers(...) to be static * perf: Enhance Encoding constructor to allow skipping of ASCII number compatibility test Adds a two parameter constructor to Encoding to allow sub classes to specify whether their known ASCII compatability so as to skip testing. The only usage of it is the UTF8Encoding which is changed to use the new constructor. * fix: limit size of char[] for utf-8 decoding Also optimize for java 9+ byte[] backed strings * fix: limit size of char[] for utf-8 decoding address style issues * fix: limit size of char[] for utf-8 decoding address style issues * fix: limit size of char[] for utf-8 decoding use existing JavaVersion enum to pick implementation add unit test string values more consistency between byte and char based implementations * fix: limit size of char[] for utf-8 decoding fix backwards comparison add more strings to unit test * fix: limit size of char[] for utf-8 decoding Move to using new String(byte[], int, int, Charset) rather than custom decoding for jre newer than 1.8. * fix: limit size of char[] for utf-8 decoding Back to custom utf-8 decoding for performance gains while validating * javadoc * put test back into test suite * avoid creating an unnecessary `char[]` when growing cached array Co-authored-by: Sehrope Sarkuni <sehrope@jackdb.com> Co-authored-by: Dave Cramer <davecramer@gmail.com>
- Loading branch information
Showing
with
320 additions
and 91 deletions.
- +51 −0 pgjdbc/src/main/java/org/postgresql/core/ByteOptimizedUTF8Encoder.java
- +24 −0 pgjdbc/src/main/java/org/postgresql/core/CharOptimizedUTF8Encoder.java
- +43 −17 pgjdbc/src/main/java/org/postgresql/core/Encoding.java
- +108 −74 pgjdbc/src/main/java/org/postgresql/core/{UTF8Encoding.java → OptimizedUTF8Encoder.java}
- +92 −0 pgjdbc/src/test/java/org/postgresql/core/UTF8EncodingTest.java
- +2 −0 pgjdbc/src/test/java/org/postgresql/test/jdbc2/Jdbc2TestSuite.java
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright (c) 2019, PostgreSQL Global Development Group | ||
* See the LICENSE file in the project root for more information. | ||
*/ | ||
|
||
package org.postgresql.core; | ||
|
||
import java.io.IOException; | ||
import java.nio.charset.Charset; | ||
|
||
/** | ||
* UTF-8 encoder which validates input and is optimized for jdk 9+ where {@code String} objects are backed by | ||
* {@code byte[]}. | ||
* @author Brett Okken | ||
*/ | ||
final class ByteOptimizedUTF8Encoder extends OptimizedUTF8Encoder { | ||
|
||
private static final Charset ASCII_CHARSET = Charset.forName("ascii"); | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
@Override | ||
public String decode(byte[] encodedString, int offset, int length) throws IOException { | ||
//for very short strings going straight to chars is up to 30% faster | ||
if (length <= 32) { | ||
return charDecode(encodedString, offset, length); | ||
} | ||
for (int i = offset, j = offset + length; i < j; ++i) { | ||
// bytes are signed values. all ascii values are positive | ||
if (encodedString[i] < 0) { | ||
return slowDecode(encodedString, offset, length, i); | ||
} | ||
} | ||
// we have confirmed all chars are ascii, give java that hint | ||
return new String(encodedString, offset, length, ASCII_CHARSET); | ||
} | ||
|
||
/** | ||
* Decodes to {@code char[]} in presence of non-ascii values after first copying all known ascii chars directly | ||
* from {@code byte[]} to {@code char[]}. | ||
*/ | ||
private synchronized String slowDecode(byte[] encodedString, int offset, int length, int curIdx) throws IOException { | ||
final char[] chars = getCharArray(length); | ||
int out = 0; | ||
for (int i = offset; i < curIdx; ++i) { | ||
chars[out++] = (char) encodedString[i]; | ||
} | ||
return decodeToChars(encodedString, curIdx, length - (curIdx - offset), chars, out); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
@@ -0,0 +1,24 @@ | ||
/* | ||
* Copyright (c) 2019, PostgreSQL Global Development Group | ||
* See the LICENSE file in the project root for more information. | ||
*/ | ||
|
||
package org.postgresql.core; | ||
|
||
import java.io.IOException; | ||
|
||
/** | ||
* UTF-8 encoder which validates input and is optimized for jdk 8 and lower where {@code String} objects are backed by | ||
* {@code char[]}. | ||
* @author Brett Okken | ||
*/ | ||
final class CharOptimizedUTF8Encoder extends OptimizedUTF8Encoder { | ||
|
||
/** | ||
* {@inheritDoc} | ||
*/ | ||
@Override | ||
public String decode(byte[] encodedString, int offset, int length) throws IOException { | ||
return charDecode(encodedString, offset, length); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.