Skip to content

Commit

Permalink
Added UTF8StringValue
Browse files Browse the repository at this point in the history
in order to avoid necessarily serializing strings we create a TextValue
that is backed by a raw utf-8 byte array.
  • Loading branch information
pontusmelke committed Aug 23, 2017
1 parent e2d5cf1 commit c298471
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 141 deletions.
Expand Up @@ -358,6 +358,12 @@ public void writeFloatingPoint( double value ) throws IOException
pack( value ); pack( value );
} }


@Override
public void writeUTF8( byte[] bytes, int offset, int length ) throws IOException
{
packUTF8(bytes, offset, length);
}

@Override @Override
public void writeString( String value ) throws IOException public void writeString( String value ) throws IOException
{ {
Expand All @@ -376,24 +382,6 @@ public void writeString( char[] value, int offset, int length ) throws IOExcepti
pack( String.valueOf( value, offset, length ) ); pack( String.valueOf( value, offset, length ) );
} }


@Override
public void beginUTF8( int size ) throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override
public void copyUTF8( long fromAddress, int length ) throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override
public void endUTF8() throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override @Override
public void beginArray( int size, ArrayType arrayType ) throws IOException public void beginArray( int size, ArrayType arrayType ) throws IOException
{ {
Expand Down
Expand Up @@ -334,6 +334,20 @@ public void pack( String value ) throws IOException
} }
} }


public void packUTF8( byte[] bytes, int offset, int length ) throws IOException
{
if ( bytes == null )
{
packNull();
}
else
{
packStringHeader( length );
out.writeBytes( bytes, offset, length );
}
}


protected void packBytesHeader( int size ) throws IOException protected void packBytesHeader( int size ) throws IOException
{ {
if ( size <= Byte.MAX_VALUE ) if ( size <= Byte.MAX_VALUE )
Expand Down Expand Up @@ -786,7 +800,7 @@ public static class Unexpected extends PackStreamException
public Unexpected( PackType expectedType, byte unexpectedMarkerByte ) public Unexpected( PackType expectedType, byte unexpectedMarkerByte )
{ {
super( "Wrong type received. Expected " + expectedType + ", received: " + type( unexpectedMarkerByte ) + super( "Wrong type received. Expected " + expectedType + ", received: " + type( unexpectedMarkerByte ) +
" " + "(" + toHexString( unexpectedMarkerByte ) + ")." ); " " + "(" + toHexString( unexpectedMarkerByte ) + ")." );
} }


private static String toHexString( byte unexpectedMarkerByte ) private static String toHexString( byte unexpectedMarkerByte )
Expand Down
Expand Up @@ -24,6 +24,7 @@
import org.junit.rules.ExpectedException; import org.junit.rules.ExpectedException;


import java.io.IOException; import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.HashMap; import java.util.HashMap;
import java.util.List; import java.util.List;
Expand Down Expand Up @@ -238,6 +239,24 @@ public void shouldTreatCharArrayAsListOfStrings() throws IOException
equalTo( VirtualValues.list( stringValue( "W" ), stringValue( "H" ), stringValue( "Y" ) ) ) ); equalTo( VirtualValues.list( stringValue( "W" ), stringValue( "H" ), stringValue( "Y" ) ) ) );
} }


@Test
public void shouldPackUtf8() throws IOException
{
// Given
String value = "\uD83D\uDE31";
byte[] bytes = value.getBytes( StandardCharsets.UTF_8 );
TextValue textValue = Values.utf8Value( bytes, 0, bytes.length );
PackedOutputArray output = new PackedOutputArray();
Neo4jPack.Packer packer = new Neo4jPack.Packer( output );
packer.pack( textValue );

// When
AnyValue unpacked = unpacked( output.bytes() );

// Then
assertThat( unpacked, equalTo( textValue ) );
}

private static class Unpackable private static class Unpackable
{ {


Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.cypher.internal.compatibility.v3_3.runtime.helpers package org.neo4j.cypher.internal.compatibility.v3_3.runtime.helpers


import org.neo4j.cypher.internal.frontend.v3_3.CypherTypeException import org.neo4j.cypher.internal.frontend.v3_3.CypherTypeException
import org.neo4j.string.UTF8
import org.neo4j.values.storable.{ArrayValue, _} import org.neo4j.values.storable.{ArrayValue, _}
import org.neo4j.values.virtual._ import org.neo4j.values.virtual._
import org.neo4j.values.{AnyValue, AnyValueWriter} import org.neo4j.values.{AnyValue, AnyValueWriter}
Expand Down Expand Up @@ -105,23 +106,34 @@ object CastSupport {


/*Returns a converter given a type value*/ /*Returns a converter given a type value*/
def getConverter(x: AnyValue): Converter = x match { def getConverter(x: AnyValue): Converter = x match {
case _: CharValue => Converter(transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]])))) case _: CharValue => Converter(
case _: TextValue => Converter(transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]:_*)))) transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]]))))
case _: BooleanValue => Converter(transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]])))) case _: TextValue => Converter(
case _: ByteValue => Converter(transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]])))) transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]: _*))))
case _: ShortValue => Converter(transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]])))) case _: BooleanValue => Converter(
case _: IntValue => Converter(transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]])))) transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]]))))
case _: LongValue => Converter(transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]])))) case _: ByteValue => Converter(
case _: FloatValue => Converter(transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]])))) transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]]))))
case _: DoubleValue => Converter(transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]])))) case _: ShortValue => Converter(
transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]]))))
case _: IntValue => Converter(
transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]]))))
case _: LongValue => Converter(
transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]]))))
case _: FloatValue => Converter(
transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]]))))
case _: DoubleValue => Converter(
transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]]))))
case _ => throw new CypherTypeException("Property values can only be of primitive types or arrays thereof") case _ => throw new CypherTypeException("Property values can only be of primitive types or arrays thereof")
} }


private def transform(writer: ArrayConverterWriter)(value: ListValue): ArrayValue = { private def transform(writer: ArrayConverterWriter)(value: ListValue): ArrayValue = {
value.writeTo(writer) value.writeTo(writer)
writer.array writer.array
} }
private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue) extends AnyValueWriter[RuntimeException] {
private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue)
extends AnyValueWriter[RuntimeException] {


private var _array: AnyRef = null private var _array: AnyRef = null
private var index = 0 private var index = 0
Expand Down Expand Up @@ -184,13 +196,8 @@ object CastSupport {


override def writeString(value: Char): Unit = write(value) override def writeString(value: Char): Unit = write(value)


override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write(new String(value, offset, length)) override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write(

new String(value, offset, length))
override def beginUTF8(size: Int): Unit = fail()

override def copyUTF8(fromAddress: Long, length: Int): Unit = fail()

override def endUTF8(): Unit = fail()


override def beginArray(size: Int, arrayType: ValueWriter.ArrayType): Unit = fail() override def beginArray(size: Int, arrayType: ValueWriter.ArrayType): Unit = fail()


Expand All @@ -199,6 +206,9 @@ object CastSupport {
override def writeByteArray(value: Array[Byte]): Unit = { override def writeByteArray(value: Array[Byte]): Unit = {
_array = value _array = value
} }

override def writeUTF8(bytes: Array[Byte], offset: Int, length: Int): Unit =
write(UTF8.decode(bytes, offset, length));
} }


} }
Expand Up @@ -122,6 +122,12 @@ public void writeString( String value )
builder.append( '|' ); builder.append( '|' );
} }


@Override
public void writeUTF8( byte[] bytes, int offset, int length ) throws RuntimeException
{
writeString( UTF8.decode( bytes, offset, length ) );
}

@Override @Override
public void writeString( char value ) public void writeString( char value )
{ {
Expand All @@ -136,24 +142,6 @@ public void writeString( char[] value, int offset, int length )
builder.append( '|' ); builder.append( '|' );
} }


@Override
public void beginUTF8( int size )
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override
public void copyUTF8( long fromAddress, int length )
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override
public void endUTF8()
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override @Override
public void beginArray( int size, ArrayType arrayType ) public void beginArray( int size, ArrayType arrayType )
{ {
Expand Down
Expand Up @@ -34,6 +34,7 @@
import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.spatial.Point; import org.neo4j.graphdb.spatial.Point;
import org.neo4j.helpers.collection.ReverseArrayIterator; import org.neo4j.helpers.collection.ReverseArrayIterator;
import org.neo4j.string.UTF8;
import org.neo4j.values.storable.TextArray; import org.neo4j.values.storable.TextArray;
import org.neo4j.values.storable.TextValue; import org.neo4j.values.storable.TextValue;
import org.neo4j.values.virtual.CoordinateReferenceSystem; import org.neo4j.values.virtual.CoordinateReferenceSystem;
Expand All @@ -45,10 +46,11 @@


/** /**
* Base class for converting AnyValue to normal java objects. * Base class for converting AnyValue to normal java objects.
* * <p>
* This base class takes care of converting all "normal" java types such as * This base class takes care of converting all "normal" java types such as
* number types, booleans, strings, arrays and lists. It leaves to the extending * number types, booleans, strings, arrays and lists. It leaves to the extending
* class to handle neo4j specific types such as nodes, edges and points. * class to handle neo4j specific types such as nodes, edges and points.
*
* @param <E> * @param <E>
*/ */
public abstract class BaseToObjectValueWriter<E extends Exception> implements AnyValueWriter<E> public abstract class BaseToObjectValueWriter<E extends Exception> implements AnyValueWriter<E>
Expand All @@ -64,7 +66,8 @@ public BaseToObjectValueWriter()


protected abstract Relationship newRelationshipProxyById( long id ); protected abstract Relationship newRelationshipProxyById( long id );


protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code, String href ); protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code,
String href );


protected abstract Point newCartesianPoint( double x, double y, String name, int code, String href ); protected abstract Point newCartesianPoint( double x, double y, String name, int code, String href );


Expand Down Expand Up @@ -349,21 +352,9 @@ public void writeString( char[] value, int offset, int length ) throws RuntimeEx
} }


@Override @Override
public void beginUTF8( int size ) throws RuntimeException public void writeUTF8( byte[] bytes, int offset, int length ) throws E
{
throw new UnsupportedOperationException();
}

@Override
public void copyUTF8( long fromAddress, int length ) throws RuntimeException
{
throw new UnsupportedOperationException();
}

@Override
public void endUTF8() throws RuntimeException
{ {
throw new UnsupportedOperationException(); writeValue( UTF8.decode( bytes, offset, length ) );
} }


@Override @Override
Expand Down
Expand Up @@ -19,6 +19,8 @@
*/ */
package org.neo4j.values.storable; package org.neo4j.values.storable;


import java.nio.charset.StandardCharsets;

import static java.lang.String.format; import static java.lang.String.format;


public abstract class StringValue extends TextValue public abstract class StringValue extends TextValue
Expand Down Expand Up @@ -113,4 +115,59 @@ public int length()
return value.length(); return value.length();
} }
} }

/*
* Just as a normal StringValue but is backed by a byte array and does string
* serialization lazily.
*
* TODO in this implementation most operation will actually load the string
* such as hashCode, length, equals etc. These could be implemented using
* the byte array directly
*/
static final class UTF8StringValue extends StringValue
{
private volatile String value;
private final byte[] bytes;
private final int offset;
private final int length;

UTF8StringValue( byte[] bytes, int offset, int length )
{
assert bytes != null;
this.bytes = bytes;
this.offset = offset;
this.length = length;
}

@Override
public <E extends Exception> void writeTo( ValueWriter<E> writer ) throws E
{
writer.writeUTF8( bytes, offset, length );
}

@Override
String value()
{
String s = value;
if ( s == null )
{
synchronized ( this )
{
s = value;
if ( s == null )
{
s = value = new String( bytes, offset, length, StandardCharsets.UTF_8 );

}
}
}
return s;
}

@Override
public int length()
{
return value().length();
}
}
} }
Expand Up @@ -28,6 +28,7 @@
*/ */
public interface ValueWriter<E extends Exception> public interface ValueWriter<E extends Exception>
{ {

enum ArrayType enum ArrayType
{ {
BYTE, BYTE,
Expand Down Expand Up @@ -61,19 +62,15 @@ enum ArrayType


void writeString( char value ) throws E; void writeString( char value ) throws E;


void writeUTF8( byte[] bytes, int offset, int length ) throws E;

default void writeString( char[] value ) throws E default void writeString( char[] value ) throws E
{ {
writeString( value, 0, value.length ); writeString( value, 0, value.length );
} }


void writeString( char[] value, int offset, int length ) throws E; void writeString( char[] value, int offset, int length ) throws E;


void beginUTF8( int size ) throws E;

void copyUTF8( long fromAddress, int length ) throws E;

void endUTF8() throws E;

void beginArray( int size, ArrayType arrayType ) throws E; void beginArray( int size, ArrayType arrayType ) throws E;


void endArray() throws E; void endArray() throws E;
Expand Down Expand Up @@ -133,22 +130,12 @@ public void writeString( char value ) throws E
} }


@Override @Override
public void writeString( char[] value, int offset, int length ) throws E public void writeUTF8( byte[] bytes, int offset, int length ) throws E
{ // no-op { //no-op
}

@Override
public void beginUTF8( int size ) throws E
{ // no-op
}

@Override
public void copyUTF8( long fromAddress, int length ) throws E
{ // no-op
} }


@Override @Override
public void endUTF8() throws E public void writeString( char[] value, int offset, int length ) throws E
{ // no-op { // no-op
} }


Expand Down

0 comments on commit c298471

Please sign in to comment.