diff --git a/community/bolt/src/main/java/org/neo4j/bolt/v1/messaging/Neo4jPack.java b/community/bolt/src/main/java/org/neo4j/bolt/v1/messaging/Neo4jPack.java index 67f5b23bdfc27..a745a86e9a408 100644 --- a/community/bolt/src/main/java/org/neo4j/bolt/v1/messaging/Neo4jPack.java +++ b/community/bolt/src/main/java/org/neo4j/bolt/v1/messaging/Neo4jPack.java @@ -358,6 +358,12 @@ public void writeFloatingPoint( double value ) throws IOException pack( value ); } + @Override + public void writeUTF8( byte[] bytes, int offset, int length ) throws IOException + { + packUTF8(bytes, offset, length); + } + @Override public void writeString( String value ) throws IOException { @@ -376,24 +382,6 @@ public void writeString( char[] value, int offset, int length ) throws IOExcepti pack( String.valueOf( value, offset, length ) ); } - @Override - public void beginUTF8( int size ) throws IOException - { - throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" ); - } - - @Override - public void copyUTF8( long fromAddress, int length ) throws IOException - { - throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" ); - } - - @Override - public void endUTF8() throws IOException - { - throw new UnsupportedOperationException( "pack stream cannot handle UTF8 values" ); - } - @Override public void beginArray( int size, ArrayType arrayType ) throws IOException { diff --git a/community/bolt/src/main/java/org/neo4j/bolt/v1/packstream/PackStream.java b/community/bolt/src/main/java/org/neo4j/bolt/v1/packstream/PackStream.java index c88cdd26e434b..c12457dde3163 100644 --- a/community/bolt/src/main/java/org/neo4j/bolt/v1/packstream/PackStream.java +++ b/community/bolt/src/main/java/org/neo4j/bolt/v1/packstream/PackStream.java @@ -334,6 +334,20 @@ public void pack( String value ) throws IOException } } + public void packUTF8( byte[] bytes, int offset, int length ) throws IOException + { + if ( bytes == null ) + { + packNull(); + } + else + { + packStringHeader( length ); + out.writeBytes( bytes, offset, length ); + } + } + + protected void packBytesHeader( int size ) throws IOException { if ( size <= Byte.MAX_VALUE ) @@ -786,7 +800,7 @@ public static class Unexpected extends PackStreamException public Unexpected( PackType expectedType, byte unexpectedMarkerByte ) { super( "Wrong type received. Expected " + expectedType + ", received: " + type( unexpectedMarkerByte ) + - " " + "(" + toHexString( unexpectedMarkerByte ) + ")." ); + " " + "(" + toHexString( unexpectedMarkerByte ) + ")." ); } private static String toHexString( byte unexpectedMarkerByte ) diff --git a/community/bolt/src/test/java/org/neo4j/bolt/v1/messaging/Neo4jPackTest.java b/community/bolt/src/test/java/org/neo4j/bolt/v1/messaging/Neo4jPackTest.java index 8996681b84fbb..25c170a9d7481 100644 --- a/community/bolt/src/test/java/org/neo4j/bolt/v1/messaging/Neo4jPackTest.java +++ b/community/bolt/src/test/java/org/neo4j/bolt/v1/messaging/Neo4jPackTest.java @@ -24,6 +24,7 @@ import org.junit.rules.ExpectedException; import java.io.IOException; +import java.nio.charset.StandardCharsets; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -238,6 +239,24 @@ public void shouldTreatCharArrayAsListOfStrings() throws IOException equalTo( VirtualValues.list( stringValue( "W" ), stringValue( "H" ), stringValue( "Y" ) ) ) ); } + @Test + public void shouldPackUtf8() throws IOException + { + // Given + String value = "\uD83D\uDE31"; + byte[] bytes = value.getBytes( StandardCharsets.UTF_8 ); + TextValue textValue = Values.utf8Value( bytes, 0, bytes.length ); + PackedOutputArray output = new PackedOutputArray(); + Neo4jPack.Packer packer = new Neo4jPack.Packer( output ); + packer.pack( textValue ); + + // When + AnyValue unpacked = unpacked( output.bytes() ); + + // Then + assertThat( unpacked, equalTo( textValue ) ); + } + private static class Unpackable { diff --git a/community/cypher/cypher/src/main/scala/org/neo4j/cypher/internal/compatibility/v3_3/runtime/helpers/CastSupport.scala b/community/cypher/cypher/src/main/scala/org/neo4j/cypher/internal/compatibility/v3_3/runtime/helpers/CastSupport.scala index 247b60ed599a2..3c886ca609595 100644 --- a/community/cypher/cypher/src/main/scala/org/neo4j/cypher/internal/compatibility/v3_3/runtime/helpers/CastSupport.scala +++ b/community/cypher/cypher/src/main/scala/org/neo4j/cypher/internal/compatibility/v3_3/runtime/helpers/CastSupport.scala @@ -20,6 +20,7 @@ package org.neo4j.cypher.internal.compatibility.v3_3.runtime.helpers import org.neo4j.cypher.internal.frontend.v3_3.CypherTypeException +import org.neo4j.string.UTF8 import org.neo4j.values.storable.{ArrayValue, _} import org.neo4j.values.virtual._ import org.neo4j.values.{AnyValue, AnyValueWriter} @@ -105,15 +106,24 @@ object CastSupport { /*Returns a converter given a type value*/ def getConverter(x: AnyValue): Converter = x match { - case _: CharValue => Converter(transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]])))) - case _: TextValue => Converter(transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]:_*)))) - case _: BooleanValue => Converter(transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]])))) - case _: ByteValue => Converter(transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]])))) - case _: ShortValue => Converter(transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]])))) - case _: IntValue => Converter(transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]])))) - case _: LongValue => Converter(transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]])))) - case _: FloatValue => Converter(transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]])))) - case _: DoubleValue => Converter(transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]])))) + case _: CharValue => Converter( + transform(new ArrayConverterWriter(classOf[Char], a => Values.charArray(a.asInstanceOf[Array[Char]])))) + case _: TextValue => Converter( + transform(new ArrayConverterWriter(classOf[String], a => Values.stringArray(a.asInstanceOf[Array[String]]: _*)))) + case _: BooleanValue => Converter( + transform(new ArrayConverterWriter(classOf[Boolean], a => Values.booleanArray(a.asInstanceOf[Array[Boolean]])))) + case _: ByteValue => Converter( + transform(new ArrayConverterWriter(classOf[Byte], a => Values.byteArray(a.asInstanceOf[Array[Byte]])))) + case _: ShortValue => Converter( + transform(new ArrayConverterWriter(classOf[Short], a => Values.shortArray(a.asInstanceOf[Array[Short]])))) + case _: IntValue => Converter( + transform(new ArrayConverterWriter(classOf[Int], a => Values.intArray(a.asInstanceOf[Array[Int]])))) + case _: LongValue => Converter( + transform(new ArrayConverterWriter(classOf[Long], a => Values.longArray(a.asInstanceOf[Array[Long]])))) + case _: FloatValue => Converter( + transform(new ArrayConverterWriter(classOf[Float], a => Values.floatArray(a.asInstanceOf[Array[Float]])))) + case _: DoubleValue => Converter( + transform(new ArrayConverterWriter(classOf[Double], a => Values.doubleArray(a.asInstanceOf[Array[Double]])))) case _ => throw new CypherTypeException("Property values can only be of primitive types or arrays thereof") } @@ -121,7 +131,9 @@ object CastSupport { value.writeTo(writer) writer.array } - private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue) extends AnyValueWriter[RuntimeException] { + + private class ArrayConverterWriter(typ: Class[_], transformer: (AnyRef) => ArrayValue) + extends AnyValueWriter[RuntimeException] { private var _array: AnyRef = null private var index = 0 @@ -184,13 +196,8 @@ object CastSupport { override def writeString(value: Char): Unit = write(value) - override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write(new String(value, offset, length)) - - override def beginUTF8(size: Int): Unit = fail() - - override def copyUTF8(fromAddress: Long, length: Int): Unit = fail() - - override def endUTF8(): Unit = fail() + override def writeString(value: Array[Char], offset: Int, length: Int): Unit = write( + new String(value, offset, length)) override def beginArray(size: Int, arrayType: ValueWriter.ArrayType): Unit = fail() @@ -199,6 +206,9 @@ object CastSupport { override def writeByteArray(value: Array[Byte]): Unit = { _array = value } + + override def writeUTF8(bytes: Array[Byte], offset: Int, length: Int): Unit = + write(UTF8.decode(bytes, offset, length)); } } diff --git a/community/kernel/src/main/java/org/neo4j/kernel/api/index/ArrayEncoder.java b/community/kernel/src/main/java/org/neo4j/kernel/api/index/ArrayEncoder.java index 11d5cc5f84532..58a4789c9030e 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/api/index/ArrayEncoder.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/api/index/ArrayEncoder.java @@ -122,6 +122,12 @@ public void writeString( String value ) builder.append( '|' ); } + @Override + public void writeUTF8( byte[] bytes, int offset, int length ) throws RuntimeException + { + writeString( UTF8.decode( bytes, offset, length ) ); + } + @Override public void writeString( char value ) { @@ -136,24 +142,6 @@ public void writeString( char[] value, int offset, int length ) builder.append( '|' ); } - @Override - public void beginUTF8( int size ) - { - throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" ); - } - - @Override - public void copyUTF8( long fromAddress, int length ) - { - throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" ); - } - - @Override - public void endUTF8() - { - throw new UnsupportedOperationException( "direct UTF8 encoding is not supported yet!" ); - } - @Override public void beginArray( int size, ArrayType arrayType ) { diff --git a/community/values/src/main/java/org/neo4j/values/BaseToObjectValueWriter.java b/community/values/src/main/java/org/neo4j/values/BaseToObjectValueWriter.java index c15cf564ec17d..f0b8632a905eb 100644 --- a/community/values/src/main/java/org/neo4j/values/BaseToObjectValueWriter.java +++ b/community/values/src/main/java/org/neo4j/values/BaseToObjectValueWriter.java @@ -34,6 +34,7 @@ import org.neo4j.graphdb.Relationship; import org.neo4j.graphdb.spatial.Point; import org.neo4j.helpers.collection.ReverseArrayIterator; +import org.neo4j.string.UTF8; import org.neo4j.values.storable.TextArray; import org.neo4j.values.storable.TextValue; import org.neo4j.values.virtual.CoordinateReferenceSystem; @@ -45,10 +46,11 @@ /** * Base class for converting AnyValue to normal java objects. - * + *

* This base class takes care of converting all "normal" java types such as * number types, booleans, strings, arrays and lists. It leaves to the extending * class to handle neo4j specific types such as nodes, edges and points. + * * @param */ public abstract class BaseToObjectValueWriter implements AnyValueWriter @@ -64,7 +66,8 @@ public BaseToObjectValueWriter() protected abstract Relationship newRelationshipProxyById( long id ); - protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code, String href ); + protected abstract Point newGeographicPoint( double longitude, double latitude, String name, int code, + String href ); protected abstract Point newCartesianPoint( double x, double y, String name, int code, String href ); @@ -349,21 +352,9 @@ public void writeString( char[] value, int offset, int length ) throws RuntimeEx } @Override - public void beginUTF8( int size ) throws RuntimeException - { - throw new UnsupportedOperationException(); - } - - @Override - public void copyUTF8( long fromAddress, int length ) throws RuntimeException - { - throw new UnsupportedOperationException(); - } - - @Override - public void endUTF8() throws RuntimeException + public void writeUTF8( byte[] bytes, int offset, int length ) throws E { - throw new UnsupportedOperationException(); + writeValue( UTF8.decode( bytes, offset, length ) ); } @Override diff --git a/community/values/src/main/java/org/neo4j/values/storable/StringValue.java b/community/values/src/main/java/org/neo4j/values/storable/StringValue.java index 2e396b700e8b3..0df0263718515 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/StringValue.java +++ b/community/values/src/main/java/org/neo4j/values/storable/StringValue.java @@ -19,6 +19,8 @@ */ package org.neo4j.values.storable; +import java.nio.charset.StandardCharsets; + import static java.lang.String.format; public abstract class StringValue extends TextValue @@ -113,4 +115,59 @@ public int length() return value.length(); } } + + /* + * Just as a normal StringValue but is backed by a byte array and does string + * serialization lazily. + * + * TODO in this implementation most operation will actually load the string + * such as hashCode, length, equals etc. These could be implemented using + * the byte array directly + */ + static final class UTF8StringValue extends StringValue + { + private volatile String value; + private final byte[] bytes; + private final int offset; + private final int length; + + UTF8StringValue( byte[] bytes, int offset, int length ) + { + assert bytes != null; + this.bytes = bytes; + this.offset = offset; + this.length = length; + } + + @Override + public void writeTo( ValueWriter writer ) throws E + { + writer.writeUTF8( bytes, offset, length ); + } + + @Override + String value() + { + String s = value; + if ( s == null ) + { + synchronized ( this ) + { + s = value; + if ( s == null ) + { + s = value = new String( bytes, offset, length, StandardCharsets.UTF_8 ); + + } + } + } + return s; + } + + @Override + public int length() + { + return value().length(); + } + } } diff --git a/community/values/src/main/java/org/neo4j/values/storable/ValueWriter.java b/community/values/src/main/java/org/neo4j/values/storable/ValueWriter.java index 5ac39518121d6..7f66592df2e51 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/ValueWriter.java +++ b/community/values/src/main/java/org/neo4j/values/storable/ValueWriter.java @@ -28,6 +28,7 @@ */ public interface ValueWriter { + enum ArrayType { BYTE, @@ -61,6 +62,8 @@ enum ArrayType void writeString( char value ) throws E; + void writeUTF8( byte[] bytes, int offset, int length ) throws E; + default void writeString( char[] value ) throws E { writeString( value, 0, value.length ); @@ -68,12 +71,6 @@ default void writeString( char[] value ) throws E void writeString( char[] value, int offset, int length ) throws E; - void beginUTF8( int size ) throws E; - - void copyUTF8( long fromAddress, int length ) throws E; - - void endUTF8() throws E; - void beginArray( int size, ArrayType arrayType ) throws E; void endArray() throws E; @@ -133,22 +130,12 @@ public void writeString( char value ) throws E } @Override - public void writeString( char[] value, int offset, int length ) throws E - { // no-op - } - - @Override - public void beginUTF8( int size ) throws E - { // no-op - } - - @Override - public void copyUTF8( long fromAddress, int length ) throws E - { // no-op + public void writeUTF8( byte[] bytes, int offset, int length ) throws E + { //no-op } @Override - public void endUTF8() throws E + public void writeString( char[] value, int offset, int length ) throws E { // no-op } diff --git a/community/values/src/main/java/org/neo4j/values/storable/Values.java b/community/values/src/main/java/org/neo4j/values/storable/Values.java index 66f8462a14ebe..675d7b7e9dec3 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/Values.java +++ b/community/values/src/main/java/org/neo4j/values/storable/Values.java @@ -107,6 +107,11 @@ public static double coerceToDouble( Value value ) public static final Value NO_VALUE = NoValue.NO_VALUE; + public static final TextValue utf8Value(byte[] bytes, int offset, int length) + { + return new StringValue.UTF8StringValue( bytes, offset, length ); + } + public static TextValue stringValue( String value ) { return new StringValue.Direct( value ); diff --git a/community/values/src/test/java/org/neo4j/values/storable/BufferValueWriter.java b/community/values/src/test/java/org/neo4j/values/storable/BufferValueWriter.java index 450a582c13f04..e75c1c133aae6 100644 --- a/community/values/src/test/java/org/neo4j/values/storable/BufferValueWriter.java +++ b/community/values/src/test/java/org/neo4j/values/storable/BufferValueWriter.java @@ -25,13 +25,12 @@ import java.util.Arrays; import java.util.List; +import org.neo4j.string.UTF8; + import static java.lang.String.format; import static org.hamcrest.MatcherAssert.assertThat; import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.BeginArray; -import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.BeginUTF8; -import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.CopyUTF8; import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.EndArray; -import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.EndUTF8; import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.WriteByteArray; import static org.neo4j.values.storable.BufferValueWriter.SpecialKind.WriteCharArray; @@ -41,9 +40,6 @@ enum SpecialKind { WriteCharArray, WriteByteArray, - BeginUTF8, - CopyUTF8, - EndUTF8, BeginArray, EndArray, } @@ -159,27 +155,15 @@ public void writeString( char value ) } @Override - public void writeString( char[] value, int offset, int length ) - { - buffer.add( Specials.charArray( value, offset, length ) ); - } - - @Override - public void beginUTF8( int size ) - { - buffer.add( Specials.beginUTF8( size ) ); - } - - @Override - public void copyUTF8( long fromAddress, int length ) + public void writeUTF8( byte[] bytes, int offset, int length ) throws RuntimeException { - buffer.add( Specials.copyUTF8( fromAddress, length ) ); + buffer.add( UTF8.decode( bytes, offset, length ) ); } @Override - public void endUTF8() + public void writeString( char[] value, int offset, int length ) { - buffer.add( Specials.endUTF8() ); + buffer.add( Specials.charArray( value, offset, length ) ); } @Override @@ -210,22 +194,7 @@ public static Special charArray( char[] value, int offset, int length ) public static Special byteArray( byte[] value ) { - return new Special( WriteByteArray, Arrays.hashCode( value ) ); - } - - public static Special beginUTF8( int size ) - { - return new Special( BeginUTF8, size ); - } - - public static Special copyUTF8( long fromAddress, int length ) - { - return new Special( CopyUTF8, format( "%d %d", fromAddress, length ) ); - } - - public static Special endUTF8() - { - return new Special( EndUTF8, 0 ); + return new Special( WriteByteArray, Arrays.hashCode( value ) ); } public static Special beginArray( int size, ArrayType arrayType ) diff --git a/community/values/src/test/java/org/neo4j/values/storable/ThrowingValueWriterTest.java b/community/values/src/test/java/org/neo4j/values/storable/ThrowingValueWriterTest.java index 64e1e9861a33c..ce09786f591bc 100644 --- a/community/values/src/test/java/org/neo4j/values/storable/ThrowingValueWriterTest.java +++ b/community/values/src/test/java/org/neo4j/values/storable/ThrowingValueWriterTest.java @@ -110,25 +110,13 @@ public void writeString( char value ) throws TestException } @Override - public void writeString( char[] value, int offset, int length ) throws TestException - { - throw new TestException(); - } - - @Override - public void beginUTF8( int size ) throws TestException + public void writeUTF8( byte[] bytes, int offset, int length ) throws TestException { throw new TestException(); } @Override - public void copyUTF8( long fromAddress, int length ) throws TestException - { - throw new TestException(); - } - - @Override - public void endUTF8() throws TestException + public void writeString( char[] value, int offset, int length ) throws TestException { throw new TestException(); } diff --git a/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java b/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java new file mode 100644 index 0000000000000..42e32235ba457 --- /dev/null +++ b/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java @@ -0,0 +1,48 @@ +package org.neo4j.values.storable; + +import org.junit.Test; + +import java.nio.charset.StandardCharsets; + +import static org.hamcrest.CoreMatchers.equalTo; +import static org.hamcrest.MatcherAssert.assertThat; +import static org.neo4j.values.storable.Values.stringValue; +import static org.neo4j.values.storable.Values.utf8Value; + +public class UTF8StringValueTest +{ + private String[] strings = {"", "1337", " ", "普通话/普通話", "\uD83D\uDE21"}; + + @Test + public void shouldHandleDifferentTypesOfStrings() + { + for ( String string : strings ) + { + TextValue stringValue = stringValue( string ); + byte[] bytes = string.getBytes( StandardCharsets.UTF_8 ); + TextValue utf8 = utf8Value( bytes, 0, bytes.length ); + assertSame( stringValue, utf8 ); + } + } + + @Test + public void shouldHandleOffset() + { + // Given + byte[] bytes = "abcdefg".getBytes( StandardCharsets.UTF_8 ); + + // When + TextValue textValue = utf8Value( bytes, 3, 2 ); + + // Then + assertSame( textValue, stringValue( "de" ) ); + } + + private void assertSame( TextValue lhs, TextValue rhs ) + { + assertThat( lhs.length(), equalTo( rhs.length() ) ); + assertThat( lhs, equalTo( rhs ) ); + assertThat( rhs, equalTo( lhs ) ); + assertThat( lhs.hashCode(), equalTo( rhs.hashCode() ) ); + } +} \ No newline at end of file