Skip to content

Commit

Permalink
Generic index also compare point coordinates
Browse files Browse the repository at this point in the history
Before, only space filling curve value was compared. This means index itself
can not verify if two points are unique or not but instead need help from outside.
Problem arise during index population where we trust index to be able to
tell us if a value is violating uniqueness constraint or not.

This is now fixed for Generic index.

We compare the serialized coordinate values (long) instead of the deserialized once
(double) because there is no need for us to convert them to actual coordinates.
We only need to provide some deterministic order in which points are stored
that keep points that are not exactly equal separate from each other.

This also means that coordinate need to propagate to internal nodes and
can not be removed in minimalSplitter.

This is a format change for native-btree-1.0 index provider and therefore
we bump GenericLayout.minor version.
  • Loading branch information
burqen committed Nov 6, 2018
1 parent d9654ce commit c481651
Show file tree
Hide file tree
Showing 6 changed files with 349 additions and 31 deletions.
Expand Up @@ -29,7 +29,7 @@ class GenericLayout extends IndexLayout<GenericKey,NativeIndexValue>

GenericLayout( int numberOfSlots, IndexSpecificSpaceFillingCurveSettingsCache spatialSettings )
{
super( "NSIL", 0, 4 );
super( "NSIL", 0, 5 );
this.numberOfSlots = numberOfSlots;
this.spatialSettings = spatialSettings;
}
Expand Down
Expand Up @@ -36,6 +36,7 @@
import static org.neo4j.collection.PrimitiveLongCollections.EMPTY_LONG_ARRAY;
import static org.neo4j.kernel.impl.index.schema.GeometryType.assertHasCoordinates;
import static org.neo4j.kernel.impl.index.schema.GeometryType.dimensions;
import static org.neo4j.kernel.impl.index.schema.GeometryType.hasCoordinates;
import static org.neo4j.kernel.impl.index.schema.GeometryType.putCrs;
import static org.neo4j.kernel.impl.index.schema.GeometryType.putPoint;
import static org.neo4j.kernel.impl.index.schema.GeometryType.readCrs;
Expand All @@ -61,8 +62,8 @@ class GeometryArrayType extends AbstractArrayType<PointValue>
{
super( ValueGroup.GEOMETRY_ARRAY, typeId, ( o1, o2, i ) -> GeometryType.compare(
// intentional long1 and long2 - not the array versions
o1.long0Array[i], o1.long1, o1.long2,
o2.long0Array[i], o2.long1, o2.long2 ),
o1.long0Array[i], o1.long1, o1.long2, o1.long3, o1.long1Array, (int) o1.long3 * i,
o2.long0Array[i], o2.long1, o2.long2, o2.long3, o2.long1Array, (int) o2.long3 * i ),
null, null, null, null, null );
}

Expand Down Expand Up @@ -137,17 +138,9 @@ boolean readValue( PageCursor cursor, int size, GenericKey into )
@Override
String toString( GenericKey state )
{
String asValueString = hasCoordinates( state ) ? asValue( state ).toString() : "NO_COORDINATES";
return format( "GeometryArray[tableId:%d, code:%d, rawValues:%s, value:%s]",
state.long1, state.long2, Arrays.toString( Arrays.copyOf( state.long0Array, state.arrayLength ) ), asValue( state ) );
}

@Override
void minimalSplitter( GenericKey left, GenericKey right, GenericKey into )
{
super.minimalSplitter( left, right, into );
// Set dimensions to 0 so that minimal splitters (i.e. point keys in internal nodes) doesn't have coordinate data,
// they don't need it since values aren't generated from internal keys anyway.
into.long3 = 0;
state.long1, state.long2, Arrays.toString( Arrays.copyOf( state.long0Array, state.arrayLength ) ), asValueString );
}

private static boolean readGeometryArrayItem( PageCursor cursor, GenericKey into )
Expand Down
Expand Up @@ -103,8 +103,8 @@ static PointValue asValue( GenericKey state, CoordinateReferenceSystem crs, int
int compareValue( GenericKey left, GenericKey right )
{
return compare(
left.long0, left.long1, left.long2,
right.long0, right.long1, right.long2 );
left.long0, left.long1, left.long2, left.long3, left.long1Array, 0,
right.long0, right.long1, right.long2, right.long3, right.long1Array, 0 );
}

@Override
Expand All @@ -123,26 +123,21 @@ boolean readValue( PageCursor cursor, int size, GenericKey into )
@Override
String toString( GenericKey state )
{
return format( "Geometry[tableId:%d, code:%d, rawValue:%d, value:%s", state.long1, state.long2, state.long0, asValue( state ) );
}

@Override
void minimalSplitter( GenericKey left, GenericKey right, GenericKey into )
{
super.minimalSplitter( left, right, into );
// Set dimensions to 0 so that minimal splitters (i.e. point keys in internal nodes) doesn't have coordinate data,
// they don't need it since values aren't generated from internal keys anyway.
into.long3 = 0;
String asValueString = hasCoordinates( state ) ? asValue( state ).toString() : "NO_COORDINATES";
return format( "Geometry[tableId:%d, code:%d, rawValue:%d, value:%s", state.long1, state.long2, state.long0, asValueString );
}

/**
* This method will compare along the curve, which is not a spatial comparison, but is correct
* for comparison within the space filling index as long as the original spatial range has already
* been decomposed into a collection of 1D curve ranges before calling down into the GPTree.
* If value on space filling curve is equal then raw comparison of serialized coordinates is done.
* This way points are only considered equal in index if they have the same coordinates and not if
* they only happen to occupy same value on space filling curve.
*/
static int compare(
long this_long0, long this_long1, long this_long2,
long that_long0, long that_long1, long that_long2 )
long this_long0, long this_long1, long this_long2, long this_long3, long[] this_long1Array, int this_coordinates_offset,
long that_long0, long that_long1, long that_long2, long that_long3, long[] that_long1Array, int that_coordinates_offset )
{
int tableIdComparison = Integer.compare( (int) this_long1, (int) that_long1 );
if ( tableIdComparison != 0 )
Expand All @@ -156,7 +151,26 @@ static int compare(
return codeComparison;
}

return Long.compare( this_long0, that_long0 );
int derivedValueComparison = Long.compare( this_long0, that_long0 );
if ( derivedValueComparison != 0 )
{
return derivedValueComparison;
}

long dimensions = Math.min( this_long3, that_long3 );
for ( int i = 0; i < dimensions; i++ )
{
// It's ok to compare the coordinate value here without deserializing them
// because we are only defining SOME deterministic order so that we can
// correctly separate unique points from each other, even if they collide
// on the space filling curve.
int coordinateComparison = Long.compare( this_long1Array[this_coordinates_offset + i], that_long1Array[that_coordinates_offset + i] );
if ( coordinateComparison != 0 )
{
return coordinateComparison;
}
}
return 0;
}

static void putCrs( PageCursor cursor, long long1, long long2, long long3 )
Expand Down Expand Up @@ -196,12 +210,22 @@ static void putPoint( PageCursor cursor, long long0, long long3, long[] long1Arr
*/
static void assertHasCoordinates( GenericKey state )
{
if ( state.long3 == 0 || state.long1Array == null )
if ( !hasCoordinates( state ) )
{
throw new IllegalStateException( "This geometry key doesn't have coordinates and can therefore neither be persisted nor generate point value." );
}
}

static boolean hasCoordinates( GenericKey state )
{
return state.long3 != 0 && state.long1Array != null;
}

static void setNoCoordinates( GenericKey state )
{
state.long3 = 0;
}

private static void put3BInt( PageCursor cursor, int value )
{
cursor.putShort( (short) value );
Expand Down
@@ -0,0 +1,239 @@
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.kernel.impl.index.schema;

import org.junit.After;
import org.junit.Before;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.RuleChain;

import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;

import org.neo4j.gis.spatial.index.curves.SpaceFillingCurve;
import org.neo4j.gis.spatial.index.curves.StandardConfiguration;
import org.neo4j.index.internal.gbptree.RecoveryCleanupWorkCollector;
import org.neo4j.internal.kernel.api.IndexOrder;
import org.neo4j.internal.kernel.api.IndexQuery;
import org.neo4j.internal.kernel.api.exceptions.schema.IndexNotApplicableKernelException;
import org.neo4j.io.fs.DefaultFileSystemAbstraction;
import org.neo4j.io.pagecache.PageCache;
import org.neo4j.kernel.api.exceptions.index.IndexEntryConflictException;
import org.neo4j.kernel.api.index.IndexDirectoryStructure;
import org.neo4j.kernel.api.index.IndexEntryUpdate;
import org.neo4j.kernel.api.schema.index.TestIndexDescriptorFactory;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.impl.api.index.IndexUpdateMode;
import org.neo4j.kernel.impl.index.schema.config.ConfiguredSpaceFillingCurveSettingsCache;
import org.neo4j.kernel.impl.index.schema.config.IndexSpecificSpaceFillingCurveSettingsCache;
import org.neo4j.storageengine.api.schema.IndexReader;
import org.neo4j.storageengine.api.schema.SimpleNodeValueClient;
import org.neo4j.storageengine.api.schema.StoreIndexDescriptor;
import org.neo4j.test.rule.PageCacheRule;
import org.neo4j.test.rule.RandomRule;
import org.neo4j.test.rule.TestDirectory;
import org.neo4j.test.rule.fs.DefaultFileSystemRule;
import org.neo4j.values.storable.CoordinateReferenceSystem;
import org.neo4j.values.storable.PointArray;
import org.neo4j.values.storable.PointValue;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.Values;

import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.rules.RuleChain.outerRule;
import static org.neo4j.kernel.api.index.IndexProvider.Monitor.EMPTY;
import static org.neo4j.test.rule.PageCacheRule.config;
import static org.neo4j.values.storable.CoordinateReferenceSystem.WGS84;

public class GenericAccessorPointsTest
{
private static final CoordinateReferenceSystem crs = CoordinateReferenceSystem.WGS84;
private static final ConfiguredSpaceFillingCurveSettingsCache configuredSettings = new ConfiguredSpaceFillingCurveSettingsCache( Config.defaults() );
private static final IndexSpecificSpaceFillingCurveSettingsCache indexSettings =
new IndexSpecificSpaceFillingCurveSettingsCache( configuredSettings, new HashMap<>() );
private static final SpaceFillingCurve curve = indexSettings.forCrs( crs, true );

private final DefaultFileSystemRule fs = new DefaultFileSystemRule();
private final TestDirectory directory = TestDirectory.testDirectory( getClass(), fs.get() );
private final PageCacheRule pageCacheRule = new PageCacheRule( config().withAccessChecks( true ) );
private final RandomRule random = new RandomRule();
@Rule
public final RuleChain rules = outerRule( random ).around( fs ).around( directory ).around( pageCacheRule );

private NativeIndexAccessor accessor;
private StoreIndexDescriptor descriptor;

@Before
public void setup()
{
DefaultFileSystemAbstraction fs = this.fs.get();
PageCache pc = pageCacheRule.getPageCache( fs );
File file = directory.file( "index" );
GenericLayout layout = new GenericLayout( 1, indexSettings );
RecoveryCleanupWorkCollector collector = RecoveryCleanupWorkCollector.ignore();
descriptor = TestIndexDescriptorFactory.forLabel( 1, 1 ).withId( 1 );
IndexDirectoryStructure.Factory factory = IndexDirectoryStructure.directoriesByProvider( directory.storeDir() );
IndexDirectoryStructure structure = factory.forProvider( GenericNativeIndexProvider.DESCRIPTOR );
accessor = new GenericNativeIndexAccessor( pc, fs, file, layout, collector, EMPTY, descriptor, indexSettings, structure, new StandardConfiguration() );
}

@After
public void tearDown()
{
accessor.close();
}

/**
* This test verify that we correctly handle unique points that all belong to the same tile on the space filling curve.
* All points share at least one dimension coordinate with another point to exercise minimal splitter.
* We verify this by asserting that we always get exactly one hit on an exact match and that the value is what we expect.
*/
@Test
public void mustHandlePointsWithinSameTile() throws IndexEntryConflictException, IndexNotApplicableKernelException
{
// given
// Many random points that all are close enough to each other to belong to the same tile on the space filling curve.
int nbrOfValues = 10000;
PointValue origin = Values.pointValue( WGS84, 0.0, 0.0 );
Long derivedValueForCenterPoint = curve.derivedValueFor( origin.coordinate() );
double[] centerPoint = curve.centerPointFor( derivedValueForCenterPoint );
double xWidthMultiplier = curve.getTileWidth( 0, curve.getMaxLevel() ) / 2;
double yWidthMultiplier = curve.getTileWidth( 1, curve.getMaxLevel() ) / 2;

List<Value> pointValues = new ArrayList<>();
List<IndexEntryUpdate<?>> updates = new ArrayList<>();
long nodeId = 1;
for ( int i = 0; i < nbrOfValues / 4; i++ )
{
double x1 = (random.nextDouble() * 2 - 1) * xWidthMultiplier;
double x2 = (random.nextDouble() * 2 - 1) * xWidthMultiplier;
double y1 = (random.nextDouble() * 2 - 1) * yWidthMultiplier;
double y2 = (random.nextDouble() * 2 - 1) * yWidthMultiplier;
PointValue value11 = Values.pointValue( WGS84, centerPoint[0] + x1, centerPoint[1] + y1 );
PointValue value12 = Values.pointValue( WGS84, centerPoint[0] + x1, centerPoint[1] + y2 );
PointValue value21 = Values.pointValue( WGS84, centerPoint[0] + x2, centerPoint[1] + y1 );
PointValue value22 = Values.pointValue( WGS84, centerPoint[0] + x2, centerPoint[1] + y2 );
assertDerivedValue( derivedValueForCenterPoint, value11, value12, value21, value22 );

nodeId = addPointsToLists( pointValues, updates, nodeId, value11, value12, value21, value22 );
}

processAll( updates );

// then
exactMatchOnAllValues( pointValues );
}

/**
* This test verify that we correctly handle unique point arrays where every point in every array belong to the same tile on the space filling curve.
* We verify this by asserting that we always get exactly one hit on an exact match and that the value is what we expect.
*/
@Test
public void mustHandlePointArraysWithinSameTile() throws IndexEntryConflictException, IndexNotApplicableKernelException
{
// given
// Many random points that all are close enough to each other to belong to the same tile on the space filling curve.
int nbrOfValues = 10000;
PointValue origin = Values.pointValue( WGS84, 0.0, 0.0 );
Long derivedValueForCenterPoint = curve.derivedValueFor( origin.coordinate() );
double[] centerPoint = curve.centerPointFor( derivedValueForCenterPoint );
double xWidthMultiplier = curve.getTileWidth( 0, curve.getMaxLevel() ) / 2;
double yWidthMultiplier = curve.getTileWidth( 1, curve.getMaxLevel() ) / 2;

List<Value> pointArrays = new ArrayList<>();
List<IndexEntryUpdate<?>> updates = new ArrayList<>();
for ( int i = 0; i < nbrOfValues; i++ )
{
int arrayLength = random.nextInt( 5 ) + 1;
PointValue[] pointValues = new PointValue[arrayLength];
for ( int j = 0; j < arrayLength; j++ )
{
double x = (random.nextDouble() * 2 - 1) * xWidthMultiplier;
double y = (random.nextDouble() * 2 - 1) * yWidthMultiplier;
PointValue value = Values.pointValue( WGS84, centerPoint[0] + x, centerPoint[1] + y );

assertDerivedValue( derivedValueForCenterPoint, value );
pointValues[j] = value;
}
PointArray array = Values.pointArray( pointValues );
pointArrays.add( array );
updates.add( IndexEntryUpdate.add( i, descriptor, array ) );
}

processAll( updates );

// then
exactMatchOnAllValues( pointArrays );
}

private long addPointsToLists( List<Value> pointValues, List<IndexEntryUpdate<?>> updates, long nodeId, PointValue... values )
{
for ( PointValue value : values )
{
pointValues.add( value );
updates.add( IndexEntryUpdate.add( nodeId++, descriptor, value ) );
}
return nodeId;
}

private void assertDerivedValue( Long targetDerivedValue, PointValue... values )
{
for ( PointValue value : values )
{
Long derivedValueForValue = curve.derivedValueFor( value.coordinate() );
assertEquals( targetDerivedValue, derivedValueForValue, "expected random value to belong to same tile as center point" );
}
}

private void processAll( List<IndexEntryUpdate<?>> updates ) throws IndexEntryConflictException
{
try ( NativeIndexUpdater updater = accessor.newUpdater( IndexUpdateMode.ONLINE ) )
{
for ( IndexEntryUpdate<?> update : updates )
{
//noinspection unchecked
updater.process( update );
}
}
}

private void exactMatchOnAllValues( List<Value> values ) throws IndexNotApplicableKernelException
{
try ( IndexReader indexReader = accessor.newReader() )
{
SimpleNodeValueClient client = new SimpleNodeValueClient();
for ( Value value : values )
{
IndexQuery.ExactPredicate exact = IndexQuery.exact( descriptor.schema().getPropertyId(), value );
indexReader.query( client, IndexOrder.NONE, true, exact );

// then
assertTrue( client.next() );
assertEquals( value, client.values[0] );
assertFalse( client.next() );
}
}
}
}

0 comments on commit c481651

Please sign in to comment.