From 34ce6b4a19df0b67e4750dab0e753951b59767e0 Mon Sep 17 00:00:00 2001 From: Pontus Melke Date: Tue, 31 Oct 2017 21:02:24 +0100 Subject: [PATCH] Removed default implementations from StringValue --- .../neo4j/values/storable/StringValue.java | 122 +++--------------- .../storable/StringWrappingStringValue.java | 104 +++++++++++++++ .../values/storable/UTF8StringValue.java | 56 ++++++-- .../values/storable/UTF8StringValueTest.java | 63 +++++++-- .../acceptance/TextValueSpecification.scala | 6 +- 5 files changed, 218 insertions(+), 133 deletions(-) diff --git a/community/values/src/main/java/org/neo4j/values/storable/StringValue.java b/community/values/src/main/java/org/neo4j/values/storable/StringValue.java index 5dd10507eff75..1c657b9281007 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/StringValue.java +++ b/community/values/src/main/java/org/neo4j/values/storable/StringValue.java @@ -49,63 +49,6 @@ public boolean equals( String x ) return value().equals( x ); } - @Override - public int computeHash() - { - //NOTE that we are basing the hash code on code points instead of char[] values. - String value = value(); - if ( value.isEmpty() ) - { - return 0; - } - int h = 1; - final int length = value.length(); - for ( int offset = 0; offset < length; ) - { - final int codePoint = value.codePointAt( offset ); - h = 31 * h + codePoint; - offset += Character.charCount( codePoint ); - } - return h; - } - - @Override - public TextValue substring( int start, int end ) - { - int s = Math.min( start, length() ); - int e = Math.min( end, length() ); - String value = value(); - int codePointStart = value.offsetByCodePoints( 0, s ); - int codePointEnd = value.offsetByCodePoints( 0, e ); - - return Values.stringValue( value.substring( codePointStart, codePointEnd ) ); - } - - @Override - public TextValue trim() - { - String value = value(); - int start = ltrimIndex( value ); - int end = rtrimIndex( value ); - return Values.stringValue( value.substring( start, Math.max( end, start ) ) ); - } - - @Override - public TextValue ltrim() - { - String value = value(); - int start = ltrimIndex( value ); - return Values.stringValue( value.substring( start, value.length() ) ); - } - - @Override - public TextValue rtrim() - { - String value = value(); - int end = rtrimIndex( value ); - return Values.stringValue( value.substring( 0, end ) ); - } - @Override public void writeTo( ValueWriter writer ) throws E { @@ -124,28 +67,6 @@ public String toString() return format( "String(\"%s\")", value() ); } - @Override - public int compareTo( TextValue other ) - { - String thisString = value(); - String thatString = other.stringValue(); - int len1 = thisString.length(); - int len2 = thatString.length(); - int lim = Math.min( len1, len2 ); - - int k = 0; - while ( k < lim ) - { - int c1 = thisString.codePointAt( k ); - int c2 = thatString.codePointAt( k ); - if ( c1 != c2 ) - { - return c1 - c2; - } - k += Character.charCount( c1 ); - } - return length() - other.length(); - } @Override public String stringValue() @@ -159,43 +80,24 @@ public String prettyPrint() return format( "'%s'", value() ); } - private int ltrimIndex( String value ) + static TextValue EMTPY = new StringValue() { - int start = 0, length = value.length(); - while ( start < length ) + @Override + protected int computeHash() { - int codePoint = value.codePointAt( start ); - if ( !Character.isWhitespace( codePoint ) ) - { - break; - } - start += Character.charCount( codePoint ); + return 0; } - return start; - } - - private int rtrimIndex( String value ) - { - int end = value.length(); - while ( end > 0 ) + @Override + public int length() { - int codePoint = value.codePointBefore( end ); - if ( !Character.isWhitespace( codePoint ) ) - { - break; - } - end--; + return 0; } - return end; - } - static TextValue EMTPY = new StringValue() - { @Override - public int length() + public TextValue substring( int start, int end ) { - return 0; + return this; } @Override @@ -216,6 +118,12 @@ public TextValue rtrim() return this; } + @Override + public int compareTo( TextValue other ) + { + return Integer.compare( 0, other.length() ); + } + @Override String value() { diff --git a/community/values/src/main/java/org/neo4j/values/storable/StringWrappingStringValue.java b/community/values/src/main/java/org/neo4j/values/storable/StringWrappingStringValue.java index 720a6d19a5d36..7d53b97c7ff5b 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/StringWrappingStringValue.java +++ b/community/values/src/main/java/org/neo4j/values/storable/StringWrappingStringValue.java @@ -44,4 +44,108 @@ public int length() { return value.codePointCount( 0, value.length() ); } + + @Override + public int computeHash() + { + //NOTE that we are basing the hash code on code points instead of char[] values. + if ( value.isEmpty() ) + { + return 0; + } + int h = 1, length = value.length(); + for ( int offset = 0, codePoint; offset < length; offset += Character.charCount( codePoint ) ) + { + codePoint = value.codePointAt( offset ); + h = 31 * h + codePoint; + } + return h; + } + + @Override + public int compareTo( TextValue other ) + { + String thisString = value; + String thatString = other.stringValue(); + int len1 = thisString.length(); + int len2 = thatString.length(); + int lim = Math.min( len1, len2 ); + + int k = 0; + while ( k < lim ) + { + int c1 = thisString.codePointAt( k ); + int c2 = thatString.codePointAt( k ); + if ( c1 != c2 ) + { + return c1 - c2; + } + k += Character.charCount( c1 ); + } + return length() - other.length(); + } + + @Override + public TextValue substring( int start, int end ) + { + int s = Math.min( start, length() ); + int e = Math.min( end, length() ); + int codePointStart = value.offsetByCodePoints( 0, s ); + int codePointEnd = value.offsetByCodePoints( 0, e ); + + return Values.stringValue( value.substring( codePointStart, codePointEnd ) ); + } + + @Override + public TextValue trim() + { + int start = ltrimIndex( value ); + int end = rtrimIndex( value ); + return Values.stringValue( value.substring( start, Math.max( end, start ) ) ); + } + + @Override + public TextValue ltrim() + { + int start = ltrimIndex( value ); + return Values.stringValue( value.substring( start, value.length() ) ); + } + + @Override + public TextValue rtrim() + { + int end = rtrimIndex( value ); + return Values.stringValue( value.substring( 0, end ) ); + } + + private int ltrimIndex( String value ) + { + int start = 0, length = value.length(); + while ( start < length ) + { + int codePoint = value.codePointAt( start ); + if ( !Character.isWhitespace( codePoint ) ) + { + break; + } + start += Character.charCount( codePoint ); + } + + return start; + } + + private int rtrimIndex( String value ) + { + int end = value.length(); + while ( end > 0 ) + { + int codePoint = value.codePointBefore( end ); + if ( !Character.isWhitespace( codePoint ) ) + { + break; + } + end--; + } + return end; + } } diff --git a/community/values/src/main/java/org/neo4j/values/storable/UTF8StringValue.java b/community/values/src/main/java/org/neo4j/values/storable/UTF8StringValue.java index bb5c96701d910..54d07928b18a2 100644 --- a/community/values/src/main/java/org/neo4j/values/storable/UTF8StringValue.java +++ b/community/values/src/main/java/org/neo4j/values/storable/UTF8StringValue.java @@ -29,10 +29,10 @@ */ public final class UTF8StringValue extends StringValue { - //0111 1111, used for removing HIGH BIT from byte - private static final int HIGH_BIT_MASK = 127; - //0100 000, used for detecting non-continuation bytes, e.g. 10xx xxxx - private static final int NON_CONTINUATION_BIT_MASK = 64; + /** Used for removing the high order bit from byte. */ + private static final int HIGH_BIT_MASK = 0b0111_1111; + /** Used for detecting non-continuation bytes. For example {@code 0b10xx_xxxx}. */ + private static final int NON_CONTINUATION_BIT_MASK = 0b0100_0000; private volatile String value; private final byte[] bytes; @@ -56,7 +56,7 @@ public void writeTo( ValueWriter writer ) throws E @Override public boolean equals( Value value ) { - if ( value instanceof org.neo4j.values.storable.UTF8StringValue ) + if ( value instanceof UTF8StringValue ) { return Arrays.equals( bytes, ((org.neo4j.values.storable.UTF8StringValue) value).bytes ); } @@ -77,8 +77,7 @@ String value() s = value; if ( s == null ) { - s = value = new String( bytes, offset, length, StandardCharsets.UTF_8 ); - + value = s = new String( bytes, offset, length, StandardCharsets.UTF_8 ); } } } @@ -212,15 +211,54 @@ public TextValue trim() int startIndex = trimLeftIndex(); int endIndex = trimRightIndex(); + if (startIndex > endIndex) + { + return StringValue.EMTPY; + } + return new UTF8StringValue( values, startIndex, Math.max( endIndex + 1 - startIndex, 0 ) ); } + @Override + public TextValue ltrim() + { + byte[] values = bytes; + if ( values.length == 0 || length == 0 ) + { + return this; + } + + int startIndex = trimLeftIndex(); + if (startIndex >= values.length) + { + return StringValue.EMTPY; + } + return new UTF8StringValue( values, startIndex, values.length - startIndex ); + } + + @Override + public TextValue rtrim() + { + byte[] values = bytes; + if ( values.length == 0 || length == 0 ) + { + return this; + } + + int endIndex = trimRightIndex(); + if (endIndex < 0) + { + return StringValue.EMTPY; + } + return new UTF8StringValue( values, offset, endIndex + 1 - offset); + } + @Override public int compareTo( TextValue other ) { if ( !(other instanceof UTF8StringValue) ) { - return super.compareTo( other ); + return -other.compareTo( this ); } UTF8StringValue otherUTF8 = (UTF8StringValue) other; int len1 = bytes.length; @@ -365,7 +403,7 @@ private int trimRightIndex() int codePoint = codePoint( (byte) (b << bytesNeeded), index, bytesNeeded ); if ( !Character.isWhitespace( codePoint ) ) { - return Math.min( index + bytesNeeded - 1, length - 1 ); + return Math.min( index + bytesNeeded - 1, bytes.length - 1 ); } index--; diff --git a/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java b/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java index c7f65cbc166fb..69a7a6722ba1f 100644 --- a/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java +++ b/community/values/src/test/java/org/neo4j/values/storable/UTF8StringValueTest.java @@ -34,7 +34,7 @@ public class UTF8StringValueTest private String[] strings = {"", "1337", " ", "普通话/普通話", "\uD83D\uDE21", " a b c ", "䤹᳽", "熨", "ۼ", "ⲹ楡톜ഷۢ⼈늉₭샺ጚ砧攡跿家䯶鲏⬖돛犽ۼ", " 㺂࿝鋦毠",//first character is a thin space, - "\u0018", ";먵熍裬岰鷲趫\uA8C5얱㓙髿ᚳᬼ≩萌 " + "\u0018", ";먵熍裬岰鷲趫\uA8C5얱㓙髿ᚳᬼ≩萌 ", "\u001cӳ" }; @Test @@ -61,25 +61,59 @@ public void shouldTrimDifferentTypesOfStrings() } } + @Test + public void shouldLTrimDifferentTypesOfStrings() + { + for ( String string : strings ) + { + TextValue stringValue = stringValue( string ); + byte[] bytes = string.getBytes( StandardCharsets.UTF_8 ); + TextValue utf8 = utf8Value( bytes ); + assertSame( stringValue.ltrim(), utf8.ltrim() ); + } + } + + @Test + public void trimShouldBeSameAsLtrimAndRtrim() + { + for ( String string : strings ) + { + TextValue utf8 = utf8Value( string.getBytes( StandardCharsets.UTF_8 ) ); + assertSame( utf8.trim(), utf8.ltrim().rtrim() ); + } + } + + @Test + public void shouldRTrimDifferentTypesOfStrings() + { + for ( String string : strings ) + { + TextValue stringValue = stringValue( string ); + byte[] bytes = string.getBytes( StandardCharsets.UTF_8 ); + TextValue utf8 = utf8Value( bytes ); + assertSame( stringValue.rtrim(), utf8.rtrim() ); + } + } + @Test public void shouldCompareTo() { - // Given - String string1 = "⦹"; - String string2 = ""; - //String string1 = "Y"; - //String string2 = "끷"; + for (String string1 : strings) + { + for ( String string2 : strings ) + { + + int x = stringValue( string1 ).compareTo( utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ) ); + int y = utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ).compareTo( stringValue( string2 ) ); + int z = utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ) + .compareTo( utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ) ); - int x = stringValue( string1 ).compareTo( utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ) ); - int y = utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ).compareTo( stringValue( string2 ) ); - int z = utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ).compareTo( utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ) ); - int xx = stringValue( string2 ).compareTo( utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ) ); - int yy = utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ).compareTo( stringValue( string1 ) ); - int zz = utf8Value( string2.getBytes( StandardCharsets.UTF_8 ) ).compareTo( utf8Value( string1.getBytes( StandardCharsets.UTF_8 ) ) ); + assertThat( x, equalTo( y ) ); + assertThat( x, equalTo( z ) ); + } + } - assertThat(x, equalTo( y )); - assertThat(x, equalTo( z )); } @Test @@ -102,5 +136,6 @@ private void assertSame( TextValue lhs, TextValue rhs ) assertThat( format( "%s != %s", lhs, rhs ), lhs, equalTo( rhs ) ); assertThat( format( "%s != %s", rhs, lhs ), rhs, equalTo( lhs ) ); assertThat( format( "%s.hashCode != %s.hashCode", rhs, lhs ), lhs.hashCode(), equalTo( rhs.hashCode() ) ); + assertThat( lhs, equalTo( rhs ) ); } } diff --git a/enterprise/cypher/acceptance-spec-suite/src/test/scala/org/neo4j/internal/cypher/acceptance/TextValueSpecification.scala b/enterprise/cypher/acceptance-spec-suite/src/test/scala/org/neo4j/internal/cypher/acceptance/TextValueSpecification.scala index 3c65ddeaebcd0..7048800f26bee 100644 --- a/enterprise/cypher/acceptance-spec-suite/src/test/scala/org/neo4j/internal/cypher/acceptance/TextValueSpecification.scala +++ b/enterprise/cypher/acceptance-spec-suite/src/test/scala/org/neo4j/internal/cypher/acceptance/TextValueSpecification.scala @@ -55,9 +55,9 @@ object TextValueSpecification extends Properties("TextValue") with Configuration property("trim") = forAll { (x: String) => { val sValue = stringValue(x) - val uTF8StringValue = utf8Value(x.getBytes(StandardCharsets.UTF_8)) - equivalent(sValue.trim(), uTF8StringValue.ltrim().rtrim()) && - equivalent(uTF8StringValue.trim(), sValue.ltrim().rtrim())} + val utf8StringValue = utf8Value(x.getBytes(StandardCharsets.UTF_8)) + equivalent(sValue.trim(), utf8StringValue.ltrim().rtrim()) && + equivalent(utf8StringValue.trim(), sValue.ltrim().rtrim())} } property("ltrim") = forAll { (x: String) =>