Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Uniqueness check with different duplicate check strategies.
Introduce duplicate checking strategies: use simple map strategy for cases when expected number of elements is small enough, use dynamic bucket based array strategy in case if expected number of elements is quite high. Proposed changes speedups things significantly and for quite big numbers of duplicates allow create unique indexes in seconds instead of minutes.
- Loading branch information
1 parent
3b1b524
commit 6d000c3
Showing
7 changed files
with
628 additions
and
146 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
274 changes: 274 additions & 0 deletions
274
...x/src/main/java/org/neo4j/kernel/api/impl/schema/verification/DuplicateCheckStrategy.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,274 @@ | ||
/* | ||
* Copyright (c) 2002-2017 "Neo Technology," | ||
* Network Engine for Objects in Lund AB [http://neotechnology.com] | ||
* | ||
* This file is part of Neo4j. | ||
* | ||
* Neo4j is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* (at your option) any later version. | ||
* | ||
* This program is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
package org.neo4j.kernel.api.impl.schema.verification; | ||
|
||
import java.util.Arrays; | ||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
import org.neo4j.kernel.api.StatementConstants; | ||
import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException; | ||
import org.neo4j.kernel.api.properties.Property; | ||
import org.neo4j.kernel.api.schema.OrderedPropertyValues; | ||
|
||
import static java.lang.Math.max; | ||
import static java.lang.Math.min; | ||
|
||
/** | ||
* Base class for strategy used for duplicate check during verification of value uniqueness during | ||
* constraint creation. | ||
* | ||
* Each particular strategy determines how uniqueness check is done and how to accumulate and store those values for | ||
* to make check time and resource consumption optimal. | ||
*/ | ||
abstract class DuplicateCheckStrategy | ||
{ | ||
/** | ||
* Check uniqueness of multiple properties that belong to a node with provided node id | ||
* @param properties node properties | ||
* @param values property values | ||
* @param nodeId checked node id | ||
* @throws IndexEntryConflictException | ||
*/ | ||
abstract void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) | ||
throws IndexEntryConflictException; | ||
|
||
/** | ||
* Check uniqueness of single property that belong to a node with provided node id. | ||
* @param property node property | ||
* @param value property value | ||
* @param nodeId checked node id | ||
* @throws IndexEntryConflictException | ||
*/ | ||
abstract void checkForDuplicate( Property property, Object value, long nodeId ) throws IndexEntryConflictException; | ||
|
||
private static boolean propertyValuesEqual( Property[] properties, Object[] values ) | ||
{ | ||
if ( properties.length != values.length ) | ||
{ | ||
return false; | ||
} | ||
for ( int i = 0; i < properties.length; i++ ) | ||
{ | ||
if ( !properties[i].valueEquals( values[i] ) ) | ||
{ | ||
return false; | ||
} | ||
} | ||
return true; | ||
} | ||
|
||
/** | ||
* Duplicate check strategy that uses plain hash map. Should be optimal for small amount of entries. | ||
*/ | ||
static class MapDuplicateCheckStrategy extends DuplicateCheckStrategy | ||
{ | ||
private Map<Object,Long> valueNodeIdMap; | ||
|
||
MapDuplicateCheckStrategy( int expectedNumberOfEntries ) | ||
{ | ||
this.valueNodeIdMap = new HashMap<>( expectedNumberOfEntries ); | ||
} | ||
|
||
@Override | ||
public void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) | ||
throws IndexEntryConflictException | ||
{ | ||
Long previousNodeId = valueNodeIdMap.put( new PropertyValues( properties, values ), nodeId ); | ||
if ( previousNodeId != null ) | ||
{ | ||
throw new IndexEntryConflictException( previousNodeId, nodeId, | ||
OrderedPropertyValues.ofUndefined( values ) ); | ||
} | ||
} | ||
|
||
@Override | ||
void checkForDuplicate( Property property, Object value, long nodeId ) throws IndexEntryConflictException | ||
{ | ||
Long previousNodeId = valueNodeIdMap.put( property, nodeId ); | ||
if ( previousNodeId != null ) | ||
{ | ||
throw new IndexEntryConflictException( previousNodeId, nodeId, value ); | ||
} | ||
} | ||
|
||
private static class PropertyValues | ||
{ | ||
private final Property[] properties; | ||
private final Object[] values; | ||
|
||
PropertyValues( Property[] properties, Object[] values ) | ||
{ | ||
this.properties = properties; | ||
this.values = values; | ||
} | ||
|
||
@Override | ||
public boolean equals( Object o ) | ||
{ | ||
if ( this == o ) | ||
{ | ||
return true; | ||
} | ||
if ( o == null || getClass() != o.getClass() ) | ||
{ | ||
return false; | ||
} | ||
|
||
PropertyValues that = (PropertyValues) o; | ||
return propertyValuesEqual( properties, that.values ); | ||
} | ||
|
||
@Override | ||
public int hashCode() | ||
{ | ||
int result = 0; | ||
for ( Property property : properties ) | ||
{ | ||
result = 31 * (result + property.hashCode()); | ||
} | ||
return result; | ||
} | ||
} | ||
} | ||
|
||
/** | ||
* Strategy that uses arrays to store entries and uses hash codes to split those entries over different buckets. | ||
* Number of buckets and size of entries block are dynamic and evaluated based on expected number of duplicates. | ||
*/ | ||
static class BucketsDuplicateCheckStrategy extends DuplicateCheckStrategy | ||
{ | ||
private static final int BASE_ENTRY_SIZE = 1000; | ||
private static final int DEFAULT_BUCKETS = 10; | ||
static final int BUCKET_STRATEGY_ENTRIES_THRESHOLD = BASE_ENTRY_SIZE * DEFAULT_BUCKETS; | ||
|
||
private static final int MAX_NUMBER_OF_BUCKETS = 100; | ||
private final int numberOfBuckets; | ||
private EntrySet[] actualValues; | ||
private final int entrySetSize; | ||
|
||
BucketsDuplicateCheckStrategy() | ||
{ | ||
this( BUCKET_STRATEGY_ENTRIES_THRESHOLD ); | ||
} | ||
|
||
BucketsDuplicateCheckStrategy( int expectedNumberOfEntries ) | ||
{ | ||
numberOfBuckets = min( MAX_NUMBER_OF_BUCKETS, (expectedNumberOfEntries / BASE_ENTRY_SIZE) + 1 ); | ||
actualValues = new EntrySet[numberOfBuckets]; | ||
entrySetSize = max( 100, BUCKET_STRATEGY_ENTRIES_THRESHOLD / numberOfBuckets ); | ||
} | ||
|
||
@Override | ||
public void checkForDuplicate( Property[] properties, Object[] values, long nodeId ) | ||
throws IndexEntryConflictException | ||
{ | ||
EntrySet current = bucketEntrySet( Arrays.hashCode( values ), entrySetSize ); | ||
|
||
// We either have to find the first conflicting entry set element, | ||
// or append one for the property we just fetched: | ||
scan: | ||
do | ||
{ | ||
for ( int i = 0; i < entrySetSize; i++ ) | ||
{ | ||
Object[] currentValues = (Object[])current.value[i]; | ||
|
||
if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) | ||
{ | ||
current.value[i] = values; | ||
current.nodeId[i] = nodeId; | ||
if ( i == entrySetSize - 1 ) | ||
{ | ||
current.next = new EntrySet( entrySetSize ); | ||
} | ||
break scan; | ||
} | ||
else if ( propertyValuesEqual( properties, currentValues ) ) | ||
{ | ||
throw new IndexEntryConflictException( current.nodeId[i], nodeId, currentValues ); | ||
} | ||
} | ||
current = current.next; | ||
} | ||
while ( current != null ); | ||
} | ||
|
||
@Override | ||
void checkForDuplicate( Property property, Object propertyValue, long nodeId ) throws IndexEntryConflictException | ||
{ | ||
EntrySet current = bucketEntrySet( propertyValue.hashCode(), entrySetSize ); | ||
|
||
// We either have to find the first conflicting entry set element, | ||
// or append one for the property we just fetched: | ||
scan: | ||
do | ||
{ | ||
for ( int i = 0; i < entrySetSize; i++ ) | ||
{ | ||
Object value = current.value[i]; | ||
|
||
if ( current.nodeId[i] == StatementConstants.NO_SUCH_NODE ) | ||
{ | ||
current.value[i] = propertyValue; | ||
current.nodeId[i] = nodeId; | ||
if ( i == entrySetSize - 1 ) | ||
{ | ||
current.next = new EntrySet( entrySetSize ); | ||
} | ||
break scan; | ||
} | ||
else if ( property.valueEquals( value ) ) | ||
{ | ||
throw new IndexEntryConflictException( current.nodeId[i], nodeId, value ); | ||
} | ||
} | ||
current = current.next; | ||
} | ||
while ( current != null ); | ||
} | ||
|
||
private EntrySet bucketEntrySet( int hashCode, int entrySetSize ) | ||
{ | ||
int bucket = Math.abs( hashCode ) % numberOfBuckets; | ||
EntrySet current = actualValues[bucket]; | ||
if ( current == null ) | ||
{ | ||
current = new EntrySet( entrySetSize ); | ||
actualValues[bucket] = current; | ||
} | ||
return current; | ||
} | ||
|
||
private static class EntrySet | ||
{ | ||
final Object[] value; | ||
final long[] nodeId; | ||
EntrySet next; | ||
|
||
EntrySet( int entrySize ) | ||
{ | ||
value = new Object[entrySize]; | ||
nodeId = new long[entrySize]; | ||
Arrays.fill( nodeId, StatementConstants.NO_SUCH_NODE ); | ||
} | ||
} | ||
} | ||
} |
Oops, something went wrong.