Skip to content

Commit

Permalink
Added UTF8StringValue
Browse files Browse the repository at this point in the history
in order to avoid necessarily serializing strings we create a TextValue
that is backed by a raw utf-8 byte array.
  • Loading branch information
pontusmelke committed Aug 23, 2017
1 parent e2d5cf1 commit c298471
Show file tree
Hide file tree
Showing 12 changed files with 205 additions and 141 deletions.
Expand Up @@ -358,6 +358,12 @@ public void writeFloatingPoint( double value ) throws IOException
pack( value );
}

@Override
public void writeUTF8( byte[] bytes, int offset, int length ) throws IOException
{
packUTF8(bytes, offset, length);
}

@Override
public void writeString( String value ) throws IOException
{
Expand All @@ -376,24 +382,6 @@ public void writeString( char[] value, int offset, int length ) throws IOExcepti
pack( String.valueOf( value, offset, length ) );
}

@Override
public void beginUTF8( int size ) throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override
public void copyUTF8( long fromAddress, int length ) throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override
public void endUTF8() throws IOException
{
throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" );
}

@Override
public void beginArray( int size, ArrayType arrayType ) throws IOException
{
Expand Down
Expand Up @@ -334,6 +334,20 @@ public void pack( String value ) throws IOException
}
}

public void packUTF8( byte[] bytes, int offset, int length ) throws IOException
{
if ( bytes == null )
{
packNull();
}
else
{
packStringHeader( length );
out.writeBytes( bytes, offset, length );
}
}


protected void packBytesHeader( int size ) throws IOException
{
if ( size <= Byte.MAX_VALUE )
Expand Down Expand Up @@ -786,7 +800,7 @@ public static class Unexpected extends PackStreamException
public Unexpected( PackType expectedType, byte unexpectedMarkerByte )
{
super( "Wrong type received. Expected " + expectedType + ", received: " + type( unexpectedMarkerByte ) +
" " + "(" + toHexString( unexpectedMarkerByte ) + ")." );
" " + "(" + toHexString( unexpectedMarkerByte ) + ")." );
}

private static String toHexString( byte unexpectedMarkerByte )
Expand Down
Expand Up @@ -24,6 +24,7 @@
import org.junit.rules.ExpectedException;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
Expand Down Expand Up @@ -238,6 +239,24 @@ public void shouldTreatCharArrayAsListOfStrings() throws IOException
equalTo( VirtualValues.list( stringValue( "W" ), stringValue( "H" ), stringValue( "Y" ) ) ) );
}

@Test
public void shouldPackUtf8() throws IOException
{
// Given
String value = "\uD83D\uDE31";
byte[] bytes = value.getBytes( StandardCharsets.UTF_8 );
TextValue textValue = Values.utf8Value( bytes, 0, bytes.length );
PackedOutputArray output = new PackedOutputArray();
Neo4jPack.Packer packer = new Neo4jPack.Packer( output );
packer.pack( textValue );

// When
AnyValue unpacked = unpacked( output.bytes() );

// Then
assertThat( unpacked, equalTo( textValue ) );
}

private static class Unpackable
{

Expand Down
Expand Up @@ -20,6 +20,7 @@
package org.neo4j.cypher.internal.compatibility.v3_3.runtime.helpers

import org.neo4j.cypher.internal.frontend.v3_3.CypherTypeException
import org.neo4j.string.UTF8
import org.neo4j.values.storable.{ArrayValue, _}
import org.neo4j.values.virtual._
import org.neo4j.values.{AnyValue, AnyValueWriter}
Expand Down Expand Up @@ -105,23 +106,34 @@ object CastSupport {

/*Returns a converter given a type value*/
def getConverter(x: AnyValue): Converter = x match {
case _: CharValue => Converter(transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]]))))
case _: TextValue => Converter(transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]:_*))))
case _: BooleanValue => Converter(transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]]))))
case _: ByteValue => Converter(transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]]))))
case _: ShortValue => Converter(transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]]))))
case _: IntValue => Converter(transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]]))))
case _: LongValue => Converter(transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]]))))
case _: FloatValue => Converter(transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]]))))
case _: DoubleValue => Converter(transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]]))))
case _: CharValue => Converter(
transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]]))))
case _: TextValue => Converter(
transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]: _*))))
case _: BooleanValue => Converter(
transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]]))))
case _: ByteValue => Converter(
transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]]))))
case _: ShortValue => Converter(
transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]]))))
case _: IntValue => Converter(
transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]]))))
case _: LongValue => Converter(
transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]]))))
case _: FloatValue => Converter(
transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]]))))
case _: DoubleValue => Converter(
transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]]))))
case _ => throw new CypherTypeException("Property values can only be of primitive types or arrays thereof")
}

private def transform(writer: ArrayConverterWriter)(value: ListValue): ArrayValue = {
value.writeTo(writer)
writer.array
}
private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue) extends AnyValueWriter[RuntimeException] {

private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue)
extends AnyValueWriter[RuntimeException] {

private var _array: AnyRef = null
private var index = 0
Expand Down Expand Up @@ -184,13 +196,8 @@ object CastSupport {

override def writeString(value: Char): Unit = write(value)

override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write(new String(value, offset, length))

override def beginUTF8(size: Int): Unit = fail()

override def copyUTF8(fromAddress: Long, length: Int): Unit = fail()

override def endUTF8(): Unit = fail()
override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write(
new String(value, offset, length))

override def beginArray(size: Int, arrayType: ValueWriter.ArrayType): Unit = fail()

Expand All @@ -199,6 +206,9 @@ object CastSupport {
override def writeByteArray(value: Array[Byte]): Unit = {
_array = value
}

override def writeUTF8(bytes: Array[Byte], offset: Int, length: Int): Unit =
write(UTF8.decode(bytes, offset, length));
}

}
Expand Up @@ -122,6 +122,12 @@ public void writeString( String value )
builder.append( '|' );
}

@Override
public void writeUTF8( byte[] bytes, int offset, int length ) throws RuntimeException
{
writeString( UTF8.decode( bytes, offset, length ) );
}

@Override
public void writeString( char value )
{
Expand All @@ -136,24 +142,6 @@ public void writeString( char[] value, int offset, int length )
builder.append( '|' );
}

@Override
public void beginUTF8( int size )
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override
public void copyUTF8( long fromAddress, int length )
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override
public void endUTF8()
{
throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" );
}

@Override
public void beginArray( int size, ArrayType arrayType )
{
Expand Down
Expand Up @@ -34,6 +34,7 @@
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.spatial.Point;
import org.neo4j.helpers.collection.ReverseArrayIterator;
import org.neo4j.string.UTF8;
import org.neo4j.values.storable.TextArray;
import org.neo4j.values.storable.TextValue;
import org.neo4j.values.virtual.CoordinateReferenceSystem;
Expand All @@ -45,10 +46,11 @@

/**
* Base class for converting AnyValue to normal java objects.
*
* <p>
* This base class takes care of converting all "normal" java types such as
* number types, booleans, strings, arrays and lists. It leaves to the extending
* class to handle neo4j specific types such as nodes, edges and points.
*
* @param <E>
*/
public abstract class BaseToObjectValueWriter<E extends Exception> implements AnyValueWriter<E>
Expand All @@ -64,7 +66,8 @@ public BaseToObjectValueWriter()

protected abstract Relationship newRelationshipProxyById( long id );

protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code, String href );
protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code,
String href );

protected abstract Point newCartesianPoint( double x, double y, String name, int code, String href );

Expand Down Expand Up @@ -349,21 +352,9 @@ public void writeString( char[] value, int offset, int length ) throws RuntimeEx
}

@Override
public void beginUTF8( int size ) throws RuntimeException
{
throw new UnsupportedOperationException();
}

@Override
public void copyUTF8( long fromAddress, int length ) throws RuntimeException
{
throw new UnsupportedOperationException();
}

@Override
public void endUTF8() throws RuntimeException
public void writeUTF8( byte[] bytes, int offset, int length ) throws E
{
throw new UnsupportedOperationException();
writeValue( UTF8.decode( bytes, offset, length ) );
}

@Override
Expand Down
Expand Up @@ -19,6 +19,8 @@
*/
package org.neo4j.values.storable;

import java.nio.charset.StandardCharsets;

import static java.lang.String.format;

public abstract class StringValue extends TextValue
Expand Down Expand Up @@ -113,4 +115,59 @@ public int length()
return value.length();
}
}

/*
* Just as a normal StringValue but is backed by a byte array and does string
* serialization lazily.
*
* TODO in this implementation most operation will actually load the string
* such as hashCode, length, equals etc. These could be implemented using
* the byte array directly
*/
static final class UTF8StringValue extends StringValue
{
private volatile String value;
private final byte[] bytes;
private final int offset;
private final int length;

UTF8StringValue( byte[] bytes, int offset, int length )
{
assert bytes != null;
this.bytes = bytes;
this.offset = offset;
this.length = length;
}

@Override
public <E extends Exception> void writeTo( ValueWriter<E> writer ) throws E
{
writer.writeUTF8( bytes, offset, length );
}

@Override
String value()
{
String s = value;
if ( s == null )
{
synchronized ( this )
{
s = value;
if ( s == null )
{
s = value = new String( bytes, offset, length, StandardCharsets.UTF_8 );

}
}
}
return s;
}

@Override
public int length()
{
return value().length();
}
}
}
Expand Up @@ -28,6 +28,7 @@
*/
public interface ValueWriter<E extends Exception>
{

enum ArrayType
{
BYTE,
Expand Down Expand Up @@ -61,19 +62,15 @@ enum ArrayType

void writeString( char value ) throws E;

void writeUTF8( byte[] bytes, int offset, int length ) throws E;

default void writeString( char[] value ) throws E
{
writeString( value, 0, value.length );
}

void writeString( char[] value, int offset, int length ) throws E;

void beginUTF8( int size ) throws E;

void copyUTF8( long fromAddress, int length ) throws E;

void endUTF8() throws E;

void beginArray( int size, ArrayType arrayType ) throws E;

void endArray() throws E;
Expand Down Expand Up @@ -133,22 +130,12 @@ public void writeString( char value ) throws E
}

@Override
public void writeString( char[] value, int offset, int length ) throws E
{ // no-op
}

@Override
public void beginUTF8( int size ) throws E
{ // no-op
}

@Override
public void copyUTF8( long fromAddress, int length ) throws E
{ // no-op
public void writeUTF8( byte[] bytes, int offset, int length ) throws E
{ //no-op
}

@Override
public void endUTF8() throws E
public void writeString( char[] value, int offset, int length ) throws E
{ // no-op
}

Expand Down

0 comments on commit c298471

Please sign in to comment.