Skip to content

Commit

Permalink
Simplified and unified NodeLabelRange implementation
Browse files Browse the repository at this point in the history
Previously each label scan store had its own implementation of NodeLabelRange
as well as parsing and composition of them.

The Lucene implementation was garbage-heavy and didn't have sorted node ids
within each range, which made implementing the gap-free label range iterator
difficult.

There have been clarifications and javadocs added around what the NodeLabelRange
methods mean. Also the return values better fit its use case, which is for
the consistency checker.
  • Loading branch information
tinwelint committed Jun 28, 2017
1 parent b932fae commit cb9c289
Show file tree
Hide file tree
Showing 12 changed files with 483 additions and 379 deletions.
Expand Up @@ -25,6 +25,10 @@
import org.neo4j.kernel.api.labelscan.AllEntriesLabelScanReader;
import org.neo4j.kernel.api.labelscan.NodeLabelRange;

/**
* Inserts empty {@link NodeLabelRange} for those ranges missing from the source iterator.
* High node id is known up front such that ranges are returned up to that point.
*/
class GapFreeAllEntriesLabelScanReader implements AllEntriesLabelScanReader
{
private final AllEntriesLabelScanReader nodeLabelRanges;
Expand All @@ -48,116 +52,74 @@ public void close() throws Exception
nodeLabelRanges.close();
}

@Override
public int rangeSize()
{
return nodeLabelRanges.rangeSize();
}

@Override
public Iterator<NodeLabelRange> iterator()
{
return new GapFillingIterator( nodeLabelRanges.iterator(), highId );
return new GapFillingIterator( nodeLabelRanges.iterator(), (highId - 1) / nodeLabelRanges.rangeSize(),
nodeLabelRanges.rangeSize() );
}

private static class GapFillingIterator extends PrefetchingIterator<NodeLabelRange>
{
private static final int BATCH_SIZE = 1_000;
private static final long[] EMPTY_LONG_ARRAY = new long[0];
private final long highId;
private Iterator<NodeLabelRange> source;
private final long highestRangeId;
private final Iterator<NodeLabelRange> source;
private final long[][] emptyRangeData;

private NodeLabelRange nextFromSource;
private long[] sourceNodeIds;
private int sourceIndex;
private int currentId;
private boolean first;
private long currentRangeId = -1;

GapFillingIterator( Iterator<NodeLabelRange> nodeLableRangeIterator, long highId )
GapFillingIterator( Iterator<NodeLabelRange> nodeLableRangeIterator, long highestRangeId, int rangeSize )
{
this.highId = highId;
this.highestRangeId = highestRangeId;
this.source = nodeLableRangeIterator;
this.first = true;
this.emptyRangeData = new long[rangeSize][];
}

@Override
protected NodeLabelRange fetchNextOrNull()
{
long baseId = currentId;
int batchSize = BATCH_SIZE;
long[] nodes = new long[batchSize];
long[][] labels = new long[batchSize][];

int cursor = 0;
for ( ; cursor < batchSize; cursor++, currentId++ )
while ( true )
{
// First or empty source
if ( first || (sourceNodeIds != null && sourceIndex >= sourceNodeIds.length) )
// These conditions only come into play after we've gotten the first range from the source
if ( nextFromSource != null )
{
first = false;
if ( source.hasNext() )
if ( currentRangeId + 1 == nextFromSource.id() )
{
nextFromSource = source.next();
sourceNodeIds = nextFromSource.nodes();
sourceIndex = 0;
// Next to return is the one from source
currentRangeId++;
return nextFromSource;
}
else

if ( currentRangeId < nextFromSource.id() )
{
nextFromSource = null;
sourceNodeIds = null;
// Source range iterator has a gap we need to fill
return new NodeLabelRange( ++currentRangeId, emptyRangeData );
}
}

if ( currentId >= highId && sourceNodeIds == null )
if ( source.hasNext() )
{
break;
// The source iterator has more ranges, grab the next one
nextFromSource = source.next();
// continue in the outer loop
}

nodes[cursor] = currentId;
if ( sourceNodeIds != null && sourceNodeIds[sourceIndex] == currentId )
else if ( currentRangeId < highestRangeId )
{
labels[cursor] = nextFromSource.labels( currentId );
sourceIndex++;
nextFromSource = new NodeLabelRange( highestRangeId, emptyRangeData );
// continue in the outer loop
}
else
{
labels[cursor] = EMPTY_LONG_ARRAY;
// End has been reached
return null;
}
}
return cursor > 0 ? new SimpleNodeLabelRange( baseId, nodes, labels ) : null;
}

private static class SimpleNodeLabelRange extends NodeLabelRange
{
private final long baseId;
private final long[] nodes;
private final long[][] labels;

SimpleNodeLabelRange( long baseId, long[] nodes, long[][] labels )
{
this.baseId = baseId;
this.nodes = nodes;
this.labels = labels;
}

@Override
public int id()
{
return (int) baseId;
}

@Override
public long[] nodes()
{
return nodes;
}

@Override
public long[] labels( long nodeId )
{
return labels[(int) (nodeId - baseId)];
}

@Override
public String toString()
{
String rangeString = baseId + "-" + (baseId + nodes.length);
String prefix = "NodeLabelRange[idRange=" + rangeString;
return toString( prefix, nodes, labels );
}
}
}
}
@@ -0,0 +1,188 @@
/*
* Copyright (c) 2002-2017 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.consistency.checking.full;

import org.junit.Rule;
import org.junit.Test;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

import org.neo4j.kernel.api.labelscan.AllEntriesLabelScanReader;
import org.neo4j.kernel.api.labelscan.NodeLabelRange;
import org.neo4j.test.rule.RandomRule;

import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;

import static org.neo4j.collection.primitive.PrimitiveLongCollections.EMPTY_LONG_ARRAY;

public class GapFreeAllEntriesLabelScanReaderTest
{
private static final int EMPTY_RANGE = 0;
private static final int NON_EMPTY_RANGE = 0b10101; // 0, 2, 4
private static final int RANGE_SIZE = 10;
private static final long[] LABEL_IDS = new long[] {1};

@Rule
public final RandomRule random = new RandomRule();

@Test
public void shouldFillGapInBeginning() throws Exception
{
// given
int[] ranges = array( EMPTY_RANGE, EMPTY_RANGE, NON_EMPTY_RANGE );
GapFreeAllEntriesLabelScanReader reader = newGapFreeAllEntriesLabelScanReader( ranges );

// when
Iterator<NodeLabelRange> iterator = reader.iterator();

// then
assertRanges( iterator, ranges );
}

@Test
public void shouldFillGapInEnd() throws Exception
{
// given
int[] ranges = array( NON_EMPTY_RANGE, EMPTY_RANGE, EMPTY_RANGE );
GapFreeAllEntriesLabelScanReader reader = newGapFreeAllEntriesLabelScanReader( ranges );

// when
Iterator<NodeLabelRange> iterator = reader.iterator();

// then
assertRanges( iterator, ranges );
}

@Test
public void shouldFillGapInMiddle() throws Exception
{
// given
int[] ranges = array( EMPTY_RANGE, NON_EMPTY_RANGE, EMPTY_RANGE );
GapFreeAllEntriesLabelScanReader reader = newGapFreeAllEntriesLabelScanReader( ranges );

// when
Iterator<NodeLabelRange> iterator = reader.iterator();

// then
assertRanges( iterator, ranges );
}

@Test
public void shouldFillRandomGaps() throws Exception
{
// given
int numberOfRanges = random.intBetween( 50, 100 );
int[] ranges = new int[numberOfRanges];
for ( int rangeId = 0; rangeId < numberOfRanges; rangeId++ )
{
ranges[rangeId] = random.nextInt( 1 << RANGE_SIZE );
}
GapFreeAllEntriesLabelScanReader reader = newGapFreeAllEntriesLabelScanReader( ranges );

// when
Iterator<NodeLabelRange> iterator = reader.iterator();

// then
assertRanges( iterator, ranges );
}

private void assertRanges( Iterator<NodeLabelRange> iterator, int[] expectedRanges )
{
for ( int expectedRangeId = 0; expectedRangeId < expectedRanges.length; expectedRangeId++ )
{
assertTrue( iterator.hasNext() );
NodeLabelRange actualRange = iterator.next();
assertEquals( expectedRangeId, actualRange.id() );
int expectedRange = expectedRanges[expectedRangeId];
long baseNodeId = expectedRangeId * RANGE_SIZE;
for ( int i = 0; i < RANGE_SIZE; i++ )
{
long nodeId = baseNodeId + i;
long[] expectedLabelIds = (expectedRange & (1 << i)) == 0 ? EMPTY_LONG_ARRAY : LABEL_IDS;
assertArrayEquals( expectedLabelIds, actualRange.labels( nodeId ) );
assertEquals( nodeId, actualRange.nodes()[i] );
}
}
assertFalse( iterator.hasNext() );
}

private GapFreeAllEntriesLabelScanReader newGapFreeAllEntriesLabelScanReader( int... ranges )
{
return new GapFreeAllEntriesLabelScanReader( ranges( RANGE_SIZE, ranges ), RANGE_SIZE * ranges.length );
}

private static AllEntriesLabelScanReader ranges( int rangeSize, int... ranges )
{
List<NodeLabelRange> rangeList = new ArrayList<>();
for ( int rangeId = 0; rangeId < ranges.length; rangeId++ )
{
rangeList.add( new NodeLabelRange( rangeId, labelsPerNode( ranges[rangeId] ) ) );
}

return new AllEntriesLabelScanReader()
{
@Override
public void close() throws Exception
{ // Nothing to close
}

@Override
public Iterator<NodeLabelRange> iterator()
{
return rangeList.iterator();
}

@Override
public long maxCount()
{
return ranges.length * rangeSize;
}

@Override
public int rangeSize()
{
return RANGE_SIZE;
}
};
}

private static long[][] labelsPerNode( int relativeNodeIds )
{
long[][] result = new long[RANGE_SIZE][];
for ( int i = 0; i < RANGE_SIZE; i++ )
{
if ( (relativeNodeIds & (1 << i)) != 0 )
{
result[i] = LABEL_IDS;
}
}
return result;
}

private static int[] array( int... relativeNodeIds )
{
return relativeNodeIds;
}
}
Expand Up @@ -51,8 +51,8 @@
import org.neo4j.consistency.store.synthetic.CountsEntry;
import org.neo4j.consistency.store.synthetic.IndexEntry;
import org.neo4j.consistency.store.synthetic.LabelScanDocument;
import org.neo4j.kernel.api.impl.labelscan.LuceneNodeLabelRange;
import org.neo4j.kernel.api.index.SchemaIndexProvider;
import org.neo4j.kernel.api.labelscan.NodeLabelRange;
import org.neo4j.kernel.api.schema.index.IndexDescriptorFactory;
import org.neo4j.kernel.impl.store.record.AbstractBaseRecord;
import org.neo4j.kernel.impl.store.record.DynamicRecord;
Expand Down Expand Up @@ -379,7 +379,7 @@ private Object parameter( Class<?> type )
}
if ( type == LabelScanDocument.class )
{
return new LabelScanDocument( new LuceneNodeLabelRange( 0, new long[] {}, new long[][] {} ) );
return new LabelScanDocument( new NodeLabelRange( 0, new long[][] {} ) );
}
if ( type == IndexEntry.class )
{
Expand Down
Expand Up @@ -21,6 +21,14 @@

import org.neo4j.helpers.collection.BoundedIterable;

/**
* Iterates over all label data in a label index.
*/
public interface AllEntriesLabelScanReader extends BoundedIterable<NodeLabelRange>
{
/**
* @return size of a range. All {@link NodeLabelRange} instances handed out by this iterator
* has the same range size.
*/
int rangeSize();
}

0 comments on commit cb9c289

Please sign in to comment.