Skip to content

Commit

Permalink
Use unsafe to access char[]
Browse files Browse the repository at this point in the history
  • Loading branch information
pontusmelke committed Sep 7, 2018
1 parent c2086a9 commit f601c7e
Show file tree
Hide file tree
Showing 3 changed files with 165 additions and 35 deletions.
Expand Up @@ -27,6 +27,7 @@
import org.neo4j.hashing.HashFunction; import org.neo4j.hashing.HashFunction;
import org.neo4j.values.AnyValue; import org.neo4j.values.AnyValue;
import org.neo4j.values.ValueMapper; import org.neo4j.values.ValueMapper;
import org.neo4j.values.storable.helpers.UnsafeStringUtils;
import org.neo4j.values.virtual.ListValue; import org.neo4j.values.virtual.ListValue;
import org.neo4j.values.virtual.VirtualValues; import org.neo4j.values.virtual.VirtualValues;


Expand Down Expand Up @@ -186,35 +187,30 @@ public int compareTo( TextValue other )
return 0; return 0;
} }


String thisString = value(); final String thisString = value();
String thatString = other.stringValue(); final String thatString = other.stringValue();

final char[] chars1 = UnsafeStringUtils.toCharArray( thisString );
final char[] chars2 = UnsafeStringUtils.toCharArray( thatString );
final int offset1 = UnsafeStringUtils.offsetOf( thisString );
final int offset2 = UnsafeStringUtils.offsetOf( thatString );
final int l1 = chars1.length;
final int l2 = chars2.length;
char c1, c2; char c1, c2;
int pos1 = 0, pos2 = 0; int pos = 0;
final int l1 = thisString.length();
final int l2 = thatString.length();
//handle empty strings first so we can have less branching in main loop
if ( l1 == 0 || l2 == 0 )
{
return l1 - l2;
}


//First compare identical substrings, here we need no fix-up //First compare identical substrings, here we need no fix-up
while ( true ) while ( true )
{ {
//NOTE: If this is a bottle neck we could use unsafe to access the underlying char[]. //if we are at the end any of the strings they are the same
//This will remove a function call and a range-check. if ( pos >= l1 || pos >= l2 )
c1 = thisString.charAt( pos1 ); {
c2 = thatString.charAt( pos2 ); return l1 - l2;
}
c1 = chars1[ pos + offset1 ];
c2 = chars2[ pos + offset2 ];
if ( c1 == c2 ) if ( c1 == c2 )
{ {
//if we are at the end of both strings they are the same pos++;
if ( pos1 == l1 - 1 || pos2 == l2 - 1 )
{
return l1 - l2;
}
pos1++;
pos2++;
} }
else else
{ {
Expand All @@ -224,23 +220,28 @@ public int compareTo( TextValue other )


//We found c1, and c2 where c1 != c2, before comparing we need //We found c1, and c2 where c1 != c2, before comparing we need
//to perform fix-up if they are in surrogate range before comparing. //to perform fix-up if they are in surrogate range before comparing.
if ( c1 >= 0xd800 && c2 >= 0xd800 ) return normalizeChars( c1, c2 );
}

private int normalizeChars( char c1, char c2 )
{
if ( c1 >= Character.MIN_HIGH_SURROGATE && c2 >= Character.MIN_HIGH_SURROGATE )
{ {
if ( c1 >= 0xe000 ) if ( c1 >= '\ue000' )
{ {
c1 -= 0x800; c1 -= '\u0800';
} }
else else
{ {
c1 += 0x2000; c1 += '\u2000';
} }
if ( c2 >= 0xe000 ) if ( c2 >= '\ue000' )
{ {
c2 -= 0x800; c2 -= '\u0800';
} }
else else
{ {
c2 += 0x2000; c2 += '\u2000';
} }
} }


Expand Down
Expand Up @@ -398,16 +398,14 @@ public static int byteArrayCompare( byte[] value1, int value1Offset, int value1L
byte[] value2, int value2Offset, int value2Length ) byte[] value2, int value2Offset, int value2Length )
{ {
int lim = Math.min( value1Length, value2Length ); int lim = Math.min( value1Length, value2Length );
int i = 0; for ( int i = 0; i < lim; i++ )
while ( i < lim )
{ {
int b1 = ((int) value1[i + value1Offset]) & 0xFF; byte b1 = value1[i + value1Offset];
int b2 = ((int) value2[i + value2Offset]) & 0xFF; byte b2 = value2[i + value2Offset];
if ( b1 != b2 ) if ( b1 != b2 )
{ {
return b1 - b2; return (((int) b1) & 0xFF) - (((int) b2) & 0xFF);
} }
i++;
} }
return value1Length - value2Length; return value1Length - value2Length;
} }
Expand Down
@@ -0,0 +1,131 @@
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.values.storable.helpers;

import sun.misc.Unsafe;

import java.lang.reflect.Field;

import org.neo4j.function.ThrowingSupplier;

public class UnsafeStringUtils
{
private static final Unsafe unsafe;
/**
* Offset to the underlying char[] value in java.lang.String
*/
private static final long valueOffset;

/**
* Some jvms also use an offset marking the start of the string in the char[]
*/
private static final long offsetOffset;
private static final long NOT_FOUND = -1L;

static
{
unsafe = theUnsafe();
valueOffset = offsetOfDeclaredField( () -> String.class.getDeclaredField( "value" ) );
offsetOffset = offsetOfDeclaredField( () -> String.class.getDeclaredField( "offset" ) );
}

/**
* Provide backdoor to unsafe
*/
private static Unsafe theUnsafe()
{
Field declaredField;
try
{
declaredField = Unsafe.class.getDeclaredField( "theUnsafe" );
declaredField.setAccessible( true );
}
catch ( Exception e )
{
declaredField = null;
}

if ( declaredField != null )
{
try
{
return (Unsafe) declaredField.get( null );
}
catch ( IllegalAccessException e )
{
return null;
}
}
else
{
return null;
}
}

/**
* Returns the offset of a declared field or {@value NOT_FOUND} if it couldn't be found.
*/
private static long offsetOfDeclaredField( ThrowingSupplier<Field,Exception> supplier )
{
Field declaredField;
try
{
declaredField = supplier.get();
}
catch ( Exception e )
{
declaredField = null;
}

if ( declaredField != null && unsafe != null )
{
return unsafe.objectFieldOffset( declaredField );
}
else
{
return NOT_FOUND;
}
}

public static char[] toCharArray( String s )
{
if ( valueOffset == NOT_FOUND )
{
//we didn't get to access the internals, we'll stick with a defensive copy
return s.toCharArray();
}
else
{
return (char[]) unsafe.getObject( s, valueOffset );
}
}

public static int offsetOf( String s )
{
if ( offsetOffset == NOT_FOUND )
{
return 0;
}
else
{
return unsafe.getInt( s, offsetOffset );
}
}
}

0 comments on commit f601c7e

Please sign in to comment.