From 6d000c395e1f9e3523f4dc0cc4b54c699fcd0b8c Mon Sep 17 00:00:00 2001 From: MishaDemianenko Date: Fri, 21 Jul 2017 23:33:00 +0200 Subject: [PATCH] Uniqueness check with different duplicate check strategies. Introduce duplicate checking strategies: use simple map strategy for cases when expected number of elements is small enough, use dynamic bucket based array strategy in case if expected number of elements is quite high. Proposed changes speedups things significantly and for quite big numbers of duplicates allow create unique indexes in seconds instead of minutes. --- .../index/IndexEntryConflictException.java | 2 +- .../CompositeDuplicateCheckingCollector.java | 86 +----- .../verification/DuplicateCheckStrategy.java | 274 +++++++++++++++++ .../DuplicateCheckingCollector.java | 77 ++--- .../PartitionedUniquenessVerifier.java | 53 +++- .../SimpleUniquenessVerifier.java | 5 +- .../DuplicateCheckStrategyTest.java | 277 ++++++++++++++++++ 7 files changed, 628 insertions(+), 146 deletions(-) create mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategy.java create mode 100644 community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategyTest.java diff --git a/community/kernel/src/main/java/org/neo4j/kernel/api/exceptions/index/IndexEntryConflictException.java b/community/kernel/src/main/java/org/neo4j/kernel/api/exceptions/index/IndexEntryConflictException.java index 79a6b649ea091..c99de3966d266 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/api/exceptions/index/IndexEntryConflictException.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/api/exceptions/index/IndexEntryConflictException.java @@ -38,7 +38,7 @@ public class IndexEntryConflictException extends Exception private final long addedNodeId; private final long existingNodeId; - public IndexEntryConflictException( long existingNodeId, long addedNodeId, Object propertyValue ) + public IndexEntryConflictException( long existingNodeId, long addedNodeId, Object... propertyValue ) { this( existingNodeId, addedNodeId, OrderedPropertyValues.ofUndefined( propertyValue ) ); } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/CompositeDuplicateCheckingCollector.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/CompositeDuplicateCheckingCollector.java index 06ea207954de3..2cf9c5fad2b38 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/CompositeDuplicateCheckingCollector.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/CompositeDuplicateCheckingCollector.java @@ -22,7 +22,6 @@ import org.apache.lucene.document.Document; import java.io.IOException; -import java.util.Arrays; import org.neo4j.kernel.api.StatementConstants; import org.neo4j.kernel.api.exceptions.KernelException; @@ -30,18 +29,15 @@ import org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure; import org.neo4j.kernel.api.index.PropertyAccessor; import org.neo4j.kernel.api.properties.Property; -import org.neo4j.kernel.api.schema.OrderedPropertyValues; public class CompositeDuplicateCheckingCollector extends DuplicateCheckingCollector { private final int[] propertyKeyIds; - private CompositeEntrySet actualValues; - public CompositeDuplicateCheckingCollector( PropertyAccessor accessor, int[] propertyKeyIds ) + CompositeDuplicateCheckingCollector( PropertyAccessor accessor, int[] propertyKeyIds ) { - super(accessor, -1); + super( accessor, StatementConstants.NO_SUCH_PROPERTY_KEY); this.propertyKeyIds = propertyKeyIds; - actualValues = new CompositeEntrySet(); } @Override @@ -56,82 +52,6 @@ protected void doCollect( int doc ) throws IOException, KernelException, IndexEn properties[i] = accessor.getProperty( nodeId, propertyKeyIds[i] ); values[i] = properties[i].value(); } - - // We either have to find the first conflicting entry set element, - // or append one for the property we just fetched: - CompositeEntrySet current = actualValues; - scan: - do - { - for ( int i = 0; i < CompositeEntrySet.INCREMENT; i++ ) - { - Object[] currentValues = current.values[i]; - - if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) - { - current.values[i] = values; - current.nodeId[i] = nodeId; - if ( i == CompositeEntrySet.INCREMENT - 1 ) - { - current.next = new CompositeEntrySet(); - } - break scan; - } - else if ( propertyValuesEqual( properties, currentValues ) ) - { - throw new IndexEntryConflictException( current.nodeId[i], nodeId, - OrderedPropertyValues.ofUndefined( currentValues ) ); - } - } - current = current.next; - } - while ( current != null ); - } - - private boolean propertyValuesEqual( Property[] properties, Object[] values ) - { - if ( properties.length != values.length ) - { - return false; - } - for ( int i = 0; i < properties.length; i++ ) - { - if ( !properties[i].valueEquals( values[i] ) ) - { - return false; - } - } - return true; - } - - @Override - public boolean needsScores() - { - return false; - } - - public void reset() - { - actualValues = new CompositeEntrySet(); - } - - /** - * A small struct of arrays of nodeId + array of property values, with a next pointer. - * Should exhibit fairly fast linear iteration, small memory overhead and dynamic growth. - *

- * NOTE: Must always call reset() before use! - */ - private static class CompositeEntrySet - { - static final int INCREMENT = 10000; - - Object[][] values = new Object[INCREMENT][]; - long[] nodeId = new long[INCREMENT]; - CompositeEntrySet next; - - CompositeEntrySet() - { - Arrays.fill( nodeId, StatementConstants.NO_SUCH_NODE ); - } + duplicateCheckStrategy.checkForDuplicate( properties, values, nodeId ); } } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategy.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategy.java new file mode 100644 index 0000000000000..2158f1d513208 --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategy.java @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2002-2017 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.verification; + +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import org.neo4j.kernel.api.StatementConstants; +import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException; +import org.neo4j.kernel.api.properties.Property; +import org.neo4j.kernel.api.schema.OrderedPropertyValues; + +import static java.lang.Math.max; +import static java.lang.Math.min; + +/** + * Base class for strategy used for duplicate check during verification of value uniqueness during + * constraint creation. + * + * Each particular strategy determines how uniqueness check is done and how to accumulate and store those values for + * to make check time and resource consumption optimal. + */ +abstract class DuplicateCheckStrategy +{ + /** + * Check uniqueness of multiple properties that belong to a node with provided node id + * @param properties node properties + * @param values property values + * @param nodeId checked node id + * @throws IndexEntryConflictException + */ + abstract void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) + throws IndexEntryConflictException; + + /** + * Check uniqueness of single property that belong to a node with provided node id. + * @param property node property + * @param value property value + * @param nodeId checked node id + * @throws IndexEntryConflictException + */ + abstract void checkForDuplicate( Property property, Object value, long nodeId ) throws IndexEntryConflictException; + + private static boolean propertyValuesEqual( Property[] properties, Object[] values ) + { + if ( properties.length != values.length ) + { + return false; + } + for ( int i = 0; i < properties.length; i++ ) + { + if ( !properties[i].valueEquals( values[i] ) ) + { + return false; + } + } + return true; + } + + /** + * Duplicate check strategy that uses plain hash map. Should be optimal for small amount of entries. + */ + static class MapDuplicateCheckStrategy extends DuplicateCheckStrategy + { + private Map valueNodeIdMap; + + MapDuplicateCheckStrategy( int expectedNumberOfEntries ) + { + this.valueNodeIdMap = new HashMap<>( expectedNumberOfEntries ); + } + + @Override + public void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) + throws IndexEntryConflictException + { + Long previousNodeId = valueNodeIdMap.put( new PropertyValues( properties, values ), nodeId ); + if ( previousNodeId != null ) + { + throw new IndexEntryConflictException( previousNodeId, nodeId, + OrderedPropertyValues.ofUndefined( values ) ); + } + } + + @Override + void checkForDuplicate( Property property, Object value, long nodeId ) throws IndexEntryConflictException + { + Long previousNodeId = valueNodeIdMap.put( property, nodeId ); + if ( previousNodeId != null ) + { + throw new IndexEntryConflictException( previousNodeId, nodeId, value ); + } + } + + private static class PropertyValues + { + private final Property[] properties; + private final Object[] values; + + PropertyValues( Property[] properties, Object[] values ) + { + this.properties = properties; + this.values = values; + } + + @Override + public boolean equals( Object o ) + { + if ( this == o ) + { + return true; + } + if ( o == null || getClass() != o.getClass() ) + { + return false; + } + + PropertyValues that = (PropertyValues) o; + return propertyValuesEqual( properties, that.values ); + } + + @Override + public int hashCode() + { + int result = 0; + for ( Property property : properties ) + { + result = 31 * (result + property.hashCode()); + } + return result; + } + } + } + + /** + * Strategy that uses arrays to store entries and uses hash codes to split those entries over different buckets. + * Number of buckets and size of entries block are dynamic and evaluated based on expected number of duplicates. + */ + static class BucketsDuplicateCheckStrategy extends DuplicateCheckStrategy + { + private static final int BASE_ENTRY_SIZE = 1000; + private static final int DEFAULT_BUCKETS = 10; + static final int BUCKET_STRATEGY_ENTRIES_THRESHOLD = BASE_ENTRY_SIZE * DEFAULT_BUCKETS; + + private static final int MAX_NUMBER_OF_BUCKETS = 100; + private final int numberOfBuckets; + private EntrySet[] actualValues; + private final int entrySetSize; + + BucketsDuplicateCheckStrategy() + { + this( BUCKET_STRATEGY_ENTRIES_THRESHOLD ); + } + + BucketsDuplicateCheckStrategy( int expectedNumberOfEntries ) + { + numberOfBuckets = min( MAX_NUMBER_OF_BUCKETS, (expectedNumberOfEntries / BASE_ENTRY_SIZE) + 1 ); + actualValues = new EntrySet[numberOfBuckets]; + entrySetSize = max( 100, BUCKET_STRATEGY_ENTRIES_THRESHOLD / numberOfBuckets ); + } + + @Override + public void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) + throws IndexEntryConflictException + { + EntrySet current = bucketEntrySet( Arrays.hashCode( values ), entrySetSize ); + + // We either have to find the first conflicting entry set element, + // or append one for the property we just fetched: + scan: + do + { + for ( int i = 0; i < entrySetSize; i++ ) + { + Object[] currentValues = (Object[])current.value[i]; + + if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) + { + current.value[i] = values; + current.nodeId[i] = nodeId; + if ( i == entrySetSize - 1 ) + { + current.next = new EntrySet( entrySetSize ); + } + break scan; + } + else if ( propertyValuesEqual( properties, currentValues ) ) + { + throw new IndexEntryConflictException( current.nodeId[i], nodeId, currentValues ); + } + } + current = current.next; + } + while ( current != null ); + } + + @Override + void checkForDuplicate( Property property, Object propertyValue, long nodeId ) throws IndexEntryConflictException + { + EntrySet current = bucketEntrySet( propertyValue.hashCode(), entrySetSize ); + + // We either have to find the first conflicting entry set element, + // or append one for the property we just fetched: + scan: + do + { + for ( int i = 0; i < entrySetSize; i++ ) + { + Object value = current.value[i]; + + if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) + { + current.value[i] = propertyValue; + current.nodeId[i] = nodeId; + if ( i == entrySetSize - 1 ) + { + current.next = new EntrySet( entrySetSize ); + } + break scan; + } + else if ( property.valueEquals( value ) ) + { + throw new IndexEntryConflictException( current.nodeId[i], nodeId, value ); + } + } + current = current.next; + } + while ( current != null ); + } + + private EntrySet bucketEntrySet( int hashCode, int entrySetSize ) + { + int bucket = Math.abs( hashCode ) % numberOfBuckets; + EntrySet current = actualValues[bucket]; + if ( current == null ) + { + current = new EntrySet( entrySetSize ); + actualValues[bucket] = current; + } + return current; + } + + private static class EntrySet + { + final Object[] value; + final long[] nodeId; + EntrySet next; + + EntrySet( int entrySize ) + { + value = new Object[entrySize]; + nodeId = new long[entrySize]; + Arrays.fill( nodeId, StatementConstants.NO_SUCH_NODE ); + } + } + } +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckingCollector.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckingCollector.java index bcefa8d7b44ff..f6b4b1ef2f2fd 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckingCollector.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckingCollector.java @@ -25,33 +25,33 @@ import org.apache.lucene.search.SimpleCollector; import java.io.IOException; -import java.util.Arrays; -import org.neo4j.kernel.api.StatementConstants; import org.neo4j.kernel.api.exceptions.KernelException; import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException; import org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure; import org.neo4j.kernel.api.index.PropertyAccessor; import org.neo4j.kernel.api.properties.Property; +import static org.neo4j.kernel.api.impl.schema.verification.DuplicateCheckStrategy.BucketsDuplicateCheckStrategy; +import static org.neo4j.kernel.api.impl.schema.verification.DuplicateCheckStrategy.MapDuplicateCheckStrategy; + public class DuplicateCheckingCollector extends SimpleCollector { protected final PropertyAccessor accessor; private final int propertyKeyId; - private EntrySet actualValues; protected LeafReader reader; + DuplicateCheckStrategy duplicateCheckStrategy; - public static DuplicateCheckingCollector forProperties( PropertyAccessor accessor, int[] propertyKeyIds ) + static DuplicateCheckingCollector forProperties( PropertyAccessor accessor, int[] propertyKeyIds ) { return (propertyKeyIds.length == 1) ? new DuplicateCheckingCollector( accessor, propertyKeyIds[0] ) : new CompositeDuplicateCheckingCollector( accessor, propertyKeyIds ); } - public DuplicateCheckingCollector( PropertyAccessor accessor, int propertyKeyId ) + DuplicateCheckingCollector( PropertyAccessor accessor, int propertyKeyId ) { this.accessor = accessor; this.propertyKeyId = propertyKeyId; - actualValues = new EntrySet(); } @Override @@ -76,35 +76,7 @@ protected void doCollect( int doc ) throws IOException, KernelException, IndexEn Document document = reader.document( doc ); long nodeId = LuceneDocumentStructure.getNodeId( document ); Property property = accessor.getProperty( nodeId, propertyKeyId ); - - // We either have to find the first conflicting entry set element, - // or append one for the property we just fetched: - EntrySet current = actualValues; - scan: - do - { - for ( int i = 0; i < EntrySet.INCREMENT; i++ ) - { - Object value = current.value[i]; - - if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) - { - current.value[i] = property.value(); - current.nodeId[i] = nodeId; - if ( i == EntrySet.INCREMENT - 1 ) - { - current.next = new EntrySet(); - } - break scan; - } - else if ( property.valueEquals( value ) ) - { - throw new IndexEntryConflictException( current.nodeId[i], nodeId, value ); - } - } - current = current.next; - } - while ( current != null ); + duplicateCheckStrategy.checkForDuplicate( property, property.value(), nodeId ); } @Override @@ -119,28 +91,33 @@ public boolean needsScores() return false; } - public void reset() + /** + * Initialise collector for unknown number of entries that are suspected to be duplicates. + */ + public void init() { - actualValues = new EntrySet(); + duplicateCheckStrategy = new BucketsDuplicateCheckStrategy(); } /** - * A small struct of arrays of nodeId + property values, with a next pointer. - * Should exhibit fairly fast linear iteration, small memory overhead and dynamic growth. - *

- * NOTE: Must always call reset() before use! + * Initialize collector for some known and expected number of entries that are suspected to be duplicates. + * @param expectedNumberOfEntries expected number entries */ - private static class EntrySet + public void init( int expectedNumberOfEntries ) { - static final int INCREMENT = 10000; - - Object[] value = new Object[INCREMENT]; - long[] nodeId = new long[INCREMENT]; - EntrySet next; - - EntrySet() + if ( useFastCheck( expectedNumberOfEntries ) ) + { + duplicateCheckStrategy = new MapDuplicateCheckStrategy( expectedNumberOfEntries ); + } + else { - Arrays.fill( nodeId, StatementConstants.NO_SUCH_NODE ); + duplicateCheckStrategy = new BucketsDuplicateCheckStrategy( expectedNumberOfEntries ); } } + + private boolean useFastCheck( int expectedNumberOfEntries ) + { + return expectedNumberOfEntries <= BucketsDuplicateCheckStrategy.BUCKET_STRATEGY_ENTRIES_THRESHOLD; + } + } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/PartitionedUniquenessVerifier.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/PartitionedUniquenessVerifier.java index f16b11edfe866..e6f41247c2b90 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/PartitionedUniquenessVerifier.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/PartitionedUniquenessVerifier.java @@ -27,10 +27,8 @@ import org.apache.lucene.index.Term; import org.apache.lucene.index.Terms; import org.apache.lucene.index.TermsEnum; -import org.apache.lucene.search.Collector; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; @@ -86,7 +84,7 @@ public void verify( PropertyAccessor accessor, int[] propKeyIds ) throws IndexEn if ( terms.docFreq() > 1 ) { TermQuery query = new TermQuery( new Term( field, termsRef ) ); - searchForDuplicates( query, accessor, propKeyIds ); + searchForDuplicates( query, accessor, propKeyIds, terms.docFreq() ); } } } @@ -99,7 +97,7 @@ public void verify( PropertyAccessor accessor, int[] propKeyIds, List up for ( Object propertyValue : updatedPropertyValues ) { Query query = LuceneDocumentStructure.newSeekQuery( propertyValue ); - searchForDuplicates( query, accessor, propKeyIds ); + searchForDuplicates( query, accessor, propKeyIds); } } @@ -133,18 +131,55 @@ private Terms termsForField( String fieldName ) throws IOException return new MultiTerms( termsArray, readerSlicesArray ); } + /** + * Search for unknown number of duplicates duplicates + * @param query query to find duplicates in + * @param accessor accessor to load actual property value from store + * @param propertyKeyIds property key ids + * @throws IOException + * @throws IndexEntryConflictException + */ private void searchForDuplicates( Query query, PropertyAccessor accessor, int[] propertyKeyIds ) throws IOException, IndexEntryConflictException + { + DuplicateCheckingCollector collector = getDuplicateCollector( accessor, propertyKeyIds ); + collector.init(); + searchForDuplicates( query, collector ); + } + + /** + * Search for known number of duplicates duplicates + * @param query query to find duplicates in + * @param accessor accessor to load actual property value from store + * @param propertyKeyIds property key ids + * @param expectedNumberOfEntries expected number of duplicates in query + * @throws IOException + * @throws IndexEntryConflictException + */ + private void searchForDuplicates( Query query, PropertyAccessor accessor, int[] propertyKeyIds, + int expectedNumberOfEntries ) throws IOException, IndexEntryConflictException + { + DuplicateCheckingCollector collector = getDuplicateCollector( accessor, propertyKeyIds ); + collector.init( expectedNumberOfEntries ); + searchForDuplicates( query, collector ); + } + + private DuplicateCheckingCollector getDuplicateCollector( PropertyAccessor accessor, int[] propertyKeyIds ) + { + return DuplicateCheckingCollector.forProperties( accessor, propertyKeyIds ); + } + + private void searchForDuplicates( Query query, DuplicateCheckingCollector collector ) + throws IndexEntryConflictException, IOException { try { - /** - * Here {@link DuplicateCheckingCollector#reset()} is deliberately not called to preserve accumulated - * state (knowledge about duplicates) across all {@link IndexSearcher#search(Query, Collector)} calls. - */ - SimpleCollector collector = DuplicateCheckingCollector.forProperties( accessor, propertyKeyIds ); for ( PartitionSearcher searcher : searchers ) { + /* + * Here {@link DuplicateCheckingCollector#init}} is deliberately not called to preserve accumulated + * state (knowledge about duplicates) across all {@link IndexSearcher#search(Query, Collector)} calls. + */ searcher.getIndexSearcher().search( query, collector ); } } diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/SimpleUniquenessVerifier.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/SimpleUniquenessVerifier.java index 4babb0c54151c..53e0f1771234a 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/SimpleUniquenessVerifier.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/verification/SimpleUniquenessVerifier.java @@ -25,7 +25,6 @@ import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; -import org.apache.lucene.search.SimpleCollector; import org.apache.lucene.search.TermQuery; import org.apache.lucene.util.BytesRef; @@ -79,7 +78,7 @@ public void verify( PropertyAccessor accessor, int[] propKeyIds ) throws IndexEn { if ( terms.docFreq() > 1 ) { - collector.reset(); + collector.init( terms.docFreq() ); searcher.search( new TermQuery( new Term( field, termsRef ) ), collector ); } } @@ -106,7 +105,7 @@ public void verify( PropertyAccessor accessor, int[] propKeyIds, List up DuplicateCheckingCollector collector = DuplicateCheckingCollector.forProperties( accessor, propKeyIds ); for ( Object propertyValue : updatedPropertyValues ) { - collector.reset(); + collector.init(); Query query = LuceneDocumentStructure.newSeekQuery( propertyValue ); indexSearcher().search( query, collector ); } diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategyTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategyTest.java new file mode 100644 index 0000000000000..948e5029c4b06 --- /dev/null +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategyTest.java @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2002-2017 "Neo Technology," + * Network Engine for Objects in Lund AB [http://neotechnology.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.verification; + +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; +import org.junit.rules.ExpectedException; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +import org.neo4j.function.Factory; +import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException; +import org.neo4j.kernel.api.impl.schema.verification.DuplicateCheckStrategy.BucketsDuplicateCheckStrategy; +import org.neo4j.kernel.api.impl.schema.verification.DuplicateCheckStrategy.MapDuplicateCheckStrategy; +import org.neo4j.kernel.api.properties.DefinedProperty; +import org.neo4j.kernel.api.properties.Property; + +import static java.lang.String.format; +import static org.neo4j.kernel.api.impl.schema.verification.DuplicateCheckStrategy.BucketsDuplicateCheckStrategy.BUCKET_STRATEGY_ENTRIES_THRESHOLD; + +@RunWith( Parameterized.class ) +public class DuplicateCheckStrategyTest +{ + + @Parameterized.Parameters + public static List> duplicateCheckStrategies() + { + return Arrays.asList( () -> new MapDuplicateCheckStrategy( 1000 ), + () -> new BucketsDuplicateCheckStrategy( randomNumberOfEntries() ) ); + } + + @Parameterized.Parameter + public Factory duplicateCheckStrategyFactory; + private DuplicateCheckStrategy checkStrategy; + + @Rule + public ExpectedException expectedException = ExpectedException.none(); + + @Before + public void setUp() throws IllegalAccessException, InstantiationException + { + checkStrategy = duplicateCheckStrategyFactory.newInstance(); + } + + @Test + public void checkStringSinglePropertyDuplicates() throws Exception + { + String duplicatedString = "duplicate"; + DefinedProperty property = Property.property( 1, duplicatedString ); + + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( + format( "Both node 1 and node 2 share the property value ( '%s' )", duplicatedString ) ); + + checkStrategy.checkForDuplicate( property, duplicatedString, 1 ); + checkStrategy.checkForDuplicate( property, duplicatedString, 2 ); + } + + @Test + public void checkNumericSinglePropertyDuplicates() throws Exception + { + Number duplicatedNumber = 0.33d; + DefinedProperty property = Property.property( 1, duplicatedNumber ); + + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( format( "Both node 3 and node 4 share the property value ( %.2f )", duplicatedNumber.floatValue()) ); + + checkStrategy.checkForDuplicate( property, duplicatedNumber, 3 ); + checkStrategy.checkForDuplicate( property, duplicatedNumber, 4 ); + } + + @Test + public void duplicateFoundAmongUniqueStringSingleProperty() throws IndexEntryConflictException + { + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + String propertyValue = String.valueOf( i ); + DefinedProperty property = Property.property( 1, propertyValue ); + checkStrategy.checkForDuplicate( property, propertyValue, i ); + } + + int duplicateTarget = BUCKET_STRATEGY_ENTRIES_THRESHOLD - 2; + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( + format( "Both node %d and node 3 share the property value ( '%s' )", duplicateTarget, duplicateTarget ) ); + String duplicatedValue = String.valueOf( duplicateTarget ); + DefinedProperty property = Property.property( 1, duplicatedValue ); + checkStrategy.checkForDuplicate( property, duplicatedValue, 3 ); + } + + @Test + public void duplicateFoundAmongUniqueNumberSingleProperty() throws IndexEntryConflictException + { + double propertyValue = 0; + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + DefinedProperty property = Property.property( 1, propertyValue ); + checkStrategy.checkForDuplicate( property, propertyValue, i ); + propertyValue += 1; + } + + int duplicateTarget = BUCKET_STRATEGY_ENTRIES_THRESHOLD - 8; + double duplicateValue = duplicateTarget; + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( + format( "Both node %d and node 3 share the property value ( %.1f )", duplicateTarget, duplicateValue ) ); + DefinedProperty property = Property.property( 1, duplicateValue ); + checkStrategy.checkForDuplicate( property, duplicateValue, 3 ); + } + + @Test + public void noDuplicatesDetectedForUniqueStringSingleProperty() throws IndexEntryConflictException + { + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + String propertyValue = String.valueOf( i ); + DefinedProperty property = Property.property( 1, propertyValue ); + checkStrategy.checkForDuplicate( property, propertyValue, i ); + } + } + + @Test + public void noDuplicatesDetectedForUniqueNumberSingleProperty() throws IndexEntryConflictException + { + double propertyValue = 0; + int numberOfIterations = randomNumberOfEntries(); + for ( int i = 0; i < numberOfIterations; i++ ) + { + propertyValue += 1d / numberOfIterations; + DefinedProperty property = Property.property( 1, propertyValue ); + checkStrategy.checkForDuplicate( property, propertyValue, i ); + } + } + + // multiple + + @Test + public void checkStringMultiplePropertiesDuplicates() throws Exception + { + String duplicateA = "duplicateA"; + String duplicateB = "duplicateB"; + DefinedProperty propertyA = Property.property( 1, duplicateA ); + DefinedProperty propertyB = Property.property( 2, duplicateB ); + + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( format( "Both node 1 and node 2 share the property value ( '%s', '%s' )", + duplicateA, duplicateB ) ); + + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{duplicateA, duplicateB}, 1 ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{duplicateA, duplicateB}, 2 ); + } + + @Test + public void checkNumericMultiplePropertiesDuplicates() throws Exception + { + Number duplicatedNumberA = 0.33d; + Number duplicatedNumberB = 2; + DefinedProperty propertyA = Property.property( 1, duplicatedNumberA ); + DefinedProperty propertyB = Property.property( 2, duplicatedNumberB ); + + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( format( "Both node 3 and node 4 share the property value ( %.2f, %d )", + duplicatedNumberA.floatValue(), duplicatedNumberB.intValue() ) ); + + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{duplicatedNumberA, duplicatedNumberB}, 3 ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{duplicatedNumberA, duplicatedNumberB}, 4 ); + } + + @Test + public void duplicateFoundAmongUniqueStringMultipleProperties() throws IndexEntryConflictException + { + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + String propertyValueA = String.valueOf( i ); + String propertyValueB = String.valueOf( -i ); + DefinedProperty propertyA = Property.property( 1, propertyValueA ); + DefinedProperty propertyB = Property.property( 2, propertyValueB ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, + new Object[]{propertyValueA, propertyValueB}, i ); + } + + int duplicateTarget = BUCKET_STRATEGY_ENTRIES_THRESHOLD - 2; + String duplicatedValueA = String.valueOf( duplicateTarget ); + String duplicatedValueB = String.valueOf( -duplicateTarget ); + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( format( "Both node %d and node 3 share the property value ( '%s', '%s' )", + duplicateTarget, duplicatedValueA, duplicatedValueB ) ); + DefinedProperty propertyA = Property.property( 1, duplicatedValueA); + DefinedProperty propertyB = Property.property( 2, duplicatedValueB); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, + new Object[]{duplicatedValueA, duplicatedValueB}, 3 ); + } + + @Test + public void duplicateFoundAmongUniqueNumberMultipleProperties() throws IndexEntryConflictException + { + double propertyValue = 0; + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + double propertyValueA = propertyValue; + double propertyValueB = -propertyValue; + DefinedProperty propertyA = Property.property( 1, propertyValueA ); + DefinedProperty propertyB = Property.property( 2, propertyValueB ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, + new Object[]{propertyValueA, propertyValueB}, i ); + propertyValue += 1; + } + + int duplicateTarget = BUCKET_STRATEGY_ENTRIES_THRESHOLD - 8; + double duplicateValueA = duplicateTarget; + double duplicateValueB = -duplicateTarget; + expectedException.expect( IndexEntryConflictException.class ); + expectedException.expectMessage( format( "Both node %d and node 3 share the property value ( %s, %s )", + duplicateTarget, duplicateValueA, duplicateValueB ) ); + DefinedProperty propertyA = Property.property( 1, duplicateValueA ); + DefinedProperty propertyB = Property.property( 2, duplicateValueB ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{duplicateValueA, duplicateValueB}, 3 ); + } + + @Test + public void noDuplicatesDetectedForUniqueStringMultipleProperties() throws IndexEntryConflictException + { + for ( int i = 0; i < randomNumberOfEntries(); i++ ) + { + String propertyValueA = String.valueOf( i ); + String propertyValueB = String.valueOf( -i ); + DefinedProperty propertyA = Property.property( 1, propertyValueA ); + DefinedProperty propertyB = Property.property( 2, propertyValueB ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, + new Object[]{propertyValueA, propertyValueB}, i ); + } + } + + @Test + public void noDuplicatesDetectedForUniqueNumberMultipleProperties() throws IndexEntryConflictException + { + double propertyValueA = 0; + double propertyValueB = 0; + int numberOfIterations = randomNumberOfEntries(); + for ( int i = 0; i < numberOfIterations; i++ ) + { + propertyValueA += 1d / numberOfIterations; + propertyValueB -= 1d / numberOfIterations; + DefinedProperty propertyA = Property.property( 1, propertyValueA ); + DefinedProperty propertyB = Property.property( 2, propertyValueB ); + checkStrategy.checkForDuplicate( new Property[]{propertyA, propertyB}, new Object[]{propertyValueA, propertyValueB}, i ); + } + } + + private static int randomNumberOfEntries() + { + return ThreadLocalRandom.current().nextInt( BUCKET_STRATEGY_ENTRIES_THRESHOLD, BUCKET_STRATEGY_ENTRIES_THRESHOLD << 1 ); + } + +}