Skip to content

Commit

Permalink
Faster decoding of short strings
Browse files Browse the repository at this point in the history
by not using Bits, which means a bit more tailored decoding code
as well as skipping one data copy and the Bits instance itself.
  • Loading branch information
tinwelint committed May 10, 2016
1 parent f1aaf17 commit 9066806
Show file tree
Hide file tree
Showing 2 changed files with 55 additions and 36 deletions.
Expand Up @@ -194,8 +194,7 @@ long longValue()
String shortStringValue() String shortStringValue()
{ {
assertOfType( SHORT_STRING ); assertOfType( SHORT_STRING );
Bits bits = valueAsBits(); return LongerShortString.decode( data, position, currentBlocksUsed() );
return LongerShortString.decode( bits );
} }


String stringValue() String stringValue()
Expand Down Expand Up @@ -356,9 +355,6 @@ private static Object readArrayFromBuffer( ByteBuffer buffer )


private void assertOfType( PropertyType expected ) private void assertOfType( PropertyType expected )
{ {
if ( type() != expected ) assert type() == expected : "Expected type " + expected + " but was " + type();
{
throw new IllegalStateException( "Expected type " + expected + " but was " + type() );
}
} }
} }
Expand Up @@ -497,6 +497,7 @@ char decTranslate( byte codePoint )
public static final int ALL_BIT_MASK = bitMask( LongerShortString.values() ); public static final int ALL_BIT_MASK = bitMask( LongerShortString.values() );
public static final int ENCODING_UTF8 = 0; public static final int ENCODING_UTF8 = 0;
public static final int ENCODING_LATIN1 = 10; public static final int ENCODING_LATIN1 = 10;
private static final int HEADER_SIZE = 39; // bits


final int encodingHeader; final int encodingHeader;
final long mask; final long mask;
Expand Down Expand Up @@ -763,37 +764,48 @@ private static void writeHeader( Bits bits, int keyId, int encoding, int stringL
*/ */
public static String decode( PropertyBlock block ) public static String decode( PropertyBlock block )
{ {
Bits bits = Bits.bitsFromLongs( block.getValueBlocks() ); return decode( block.getValueBlocks(), 0, block.getValueBlocks().length );
return decode( bits );
} }


public static String decode(Bits bits) public static String decode( long[] blocks, int offset, int length )
{ {
long firstLong = bits.getLongs()[0]; long firstLong = blocks[offset];
if ( ( firstLong & 0xFFFFFF0FFFFFFFFFL ) == 0 ) return ""; if ( ( firstLong & 0xFFFFFF0FFFFFFFFFL ) == 0 ) return "";
bits.getInt( 24 ); // Get rid of the key // key(24b) + type(4) = 28
bits.getByte( 4 ); // Get rid of the type int encoding = (int) ((firstLong & 0x1F0000000L) >>> 28); // 5 bits of encoding
int encoding = bits.getByte( 5 ); //(int) ( ( firstLong & 0xF00000000L ) >>> 32 ); int stringLength = (int) ((firstLong & 0x7E00000000L) >>> 33); // 6 bits of stringLength
int stringLength = bits.getByte( 6 ); //(int) ( ( firstLong & 0xFC000000L ) >>> 26 ); if ( encoding == LongerShortString.ENCODING_UTF8 ) return decodeUTF8( blocks, offset, stringLength );
if ( encoding == LongerShortString.ENCODING_UTF8 ) return decodeUTF8( bits, stringLength ); if ( encoding == ENCODING_LATIN1 ) return decodeLatin1( blocks, offset, stringLength );
if ( encoding == ENCODING_LATIN1 ) return decodeLatin1( bits, stringLength );


LongerShortString table = getEncodingTable( encoding ); LongerShortString table = getEncodingTable( encoding );
assert table != null: "We only decode LongerShortStrings after we have consistently read the PropertyBlock " + assert table != null: "We only decode LongerShortStrings after we have consistently read the PropertyBlock " +
"data from the page cache. Thus, we should never have an invalid encoding header here."; "data from the page cache. Thus, we should never have an invalid encoding header here.";
char[] result = new char[stringLength]; char[] result = new char[stringLength];
// encode shifts in the bytes with the first char at the MSB, therefore // encode shifts in the bytes with the first char at the MSB, therefore
// we must "unshift" in the reverse order // we must "unshift" in the reverse order
for ( int i = 0; i < stringLength; i++ ) decode( result, blocks, offset, table );
{
byte codePoint = bits.getByte( table.step );
result[i] = table.decTranslate( codePoint );
}


// We know the char array is unshared, so use sharing constructor explicitly // We know the char array is unshared, so use sharing constructor explicitly
return UnsafeUtil.newSharedArrayString( result ); return UnsafeUtil.newSharedArrayString( result );
} }


private static void decode( char[] result, long[] blocks, int offset, LongerShortString table )
{
int block = offset;
int maskShift = HEADER_SIZE;
long baseMask = table.mask;
for ( int i = 0; i < result.length; i++ )
{
byte codePoint = (byte) ((blocks[block] >>> maskShift) & baseMask);
maskShift += table.step;
if ( maskShift >= 64 && block + 1 < blocks.length )
{
maskShift %= 64;
codePoint |= (blocks[++block] & (baseMask >>> (table.step-maskShift))) << (table.step-maskShift);
}
result[i] = table.decTranslate( codePoint );
}
}


// lookup table by encoding header // lookup table by encoding header
// +2 because of ENCODING_LATIN1 gap and one based index // +2 because of ENCODING_LATIN1 gap and one based index
Expand Down Expand Up @@ -896,22 +908,40 @@ private void translateData(Bits bits, byte[] data, int length, final int step)
} }
} }


private static String decodeLatin1( Bits bits, int stringLength ) private static String decodeLatin1( long[] blocks, int offset, int stringLength )
{ // see decode {
char[] result = new char[stringLength]; char[] result = new char[stringLength];
for ( int i = 0; i < stringLength; i++ ) int block = offset;
int maskShift = HEADER_SIZE;
for ( int i = 0; i < result.length; i++ )
{ {
result[i] = (char) bits.getShort( 8 ); char codePoint = (char) ((blocks[block] >>> maskShift) & 0xFF);
maskShift += 8;
if ( maskShift >= 64 )
{
maskShift %= 64;
codePoint |= (blocks[++block] & (0xFF >>> (8-maskShift))) << (8-maskShift);
}
result[i] = codePoint;
} }
return new String( result ); return UnsafeUtil.newSharedArrayString( result );
} }


private static String decodeUTF8( Bits bits, int stringLength ) private static String decodeUTF8( long[] blocks, int offset, int stringLength )
{ {
byte[] result = new byte[stringLength]; byte[] result = new byte[stringLength];
for ( int i = 0; i < stringLength; i++ ) int block = offset;
int maskShift = HEADER_SIZE;
for ( int i = 0; i < result.length; i++ )
{ {
result[i] = bits.getByte(); byte codePoint = (byte) (blocks[block] >>> maskShift);
maskShift += 8;
if ( maskShift >= 64 )
{
maskShift %= 64;
codePoint |= (blocks[++block] & (0xFF >>> (8-maskShift))) << (8-maskShift);
}
result[i] = codePoint;
} }
try try
{ {
Expand All @@ -930,13 +960,6 @@ public static int calculateNumberOfBlocksUsed( long firstBlock )
*/ */
int encoding = (int) ( ( firstBlock & 0x1F0000000L ) >> 28 ); int encoding = (int) ( ( firstBlock & 0x1F0000000L ) >> 28 );
int length = (int) ( ( firstBlock & 0x7E00000000L ) >> 33 ); int length = (int) ( ( firstBlock & 0x7E00000000L ) >> 33 );
/*
Bits bits = Bits.bitsFromLongs( new long[] {firstBlock} );
bits.getInt( 24 ); // key
bits.getByte( 4 ); // type
int encoding = bits.getByte( 5 );
int length = bits.getByte( 6 );
*/
if ( encoding == ENCODING_UTF8 || encoding == ENCODING_LATIN1 ) if ( encoding == ENCODING_UTF8 || encoding == ENCODING_LATIN1 )
{ {
return calculateNumberOfBlocksUsedForStep8(length); return calculateNumberOfBlocksUsedForStep8(length);
Expand Down

0 comments on commit 9066806

Please sign in to comment.