Skip to content

Commit

Permalink
Adds IndexReader#distinctValues
Browse files Browse the repository at this point in the history
With main purpose to access distinct values and counts for each value regardless
of index provider. There are procedures floating around for doing this on
IndexReader from provider backed by Lucene, but this doesn't work for native indexes,
until now.

Two points of ugliness in this commit:
- A cast from Layout -> SchemaLayout in NativeSchemaIndexReader.
  This is done to prevent a bigger refactoring, which have already been done in 3.5.
- HashBasedIndex just throws UnsupportedOperationException in its distinctValues.
  This is because this functionality isn't really needed for this testing index
  and HashBasedIndex and in-memory indexing as a whole has been removed in 3.5.
  • Loading branch information
tinwelint committed Nov 8, 2018
1 parent fa9fa29 commit f9c39ab
Show file tree
Hide file tree
Showing 17 changed files with 732 additions and 44 deletions.
@@ -0,0 +1,90 @@
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.kernel.impl.index.schema;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;

import org.neo4j.cursor.RawCursor;
import org.neo4j.index.internal.gbptree.Hit;

public class NativeDistinctValuesProgressor<KEY extends NativeSchemaKey<KEY>, VALUE extends NativeSchemaValue> extends NativeIndexProgressor<KEY,VALUE>
{
private final SchemaLayout<KEY> layout;
private final KEY prev;
private boolean first = true;
private long countForCurrentValue;
private boolean last;

NativeDistinctValuesProgressor( RawCursor<Hit<KEY,VALUE>,IOException> seeker, NodeValueClient client,
Collection<RawCursor<Hit<KEY,VALUE>,IOException>> toRemoveFromOnClose, SchemaLayout<KEY> layout )
{
super( seeker, client, toRemoveFromOnClose );
this.layout = layout;
prev = layout.newKey();
}

@Override
public boolean next()
{
try
{
while ( seeker.next() )
{
KEY key = seeker.get().key();
try
{
if ( first )
{
first = false;
countForCurrentValue = 1;
}
else if ( layout.compareValue( prev, key ) == 0 )
{
// same as previous
countForCurrentValue++;
}
else
{
// different from previous
boolean accepted = client.acceptNode( countForCurrentValue, extractValues( prev ) );
countForCurrentValue = 1;
if ( accepted )
{
return true;
}
}
}
finally
{
layout.copyKey( key, prev );
}
}
boolean finalResult = !first && !last && client.acceptNode( countForCurrentValue, extractValues( prev ) );
last = true;
return finalResult;
}
catch ( IOException e )
{
throw new UncheckedIOException( e );
}
}
}
Expand Up @@ -25,22 +25,14 @@

import org.neo4j.cursor.RawCursor;
import org.neo4j.index.internal.gbptree.Hit;
import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.values.storable.Value;

public class NativeHitIndexProgressor<KEY extends NativeSchemaKey<KEY>, VALUE extends NativeSchemaValue> implements IndexProgressor
public class NativeHitIndexProgressor<KEY extends NativeSchemaKey<KEY>, VALUE extends NativeSchemaValue> extends NativeIndexProgressor<KEY,VALUE>
{
private final RawCursor<Hit<KEY,VALUE>,IOException> seeker;
private final NodeValueClient client;
private final Collection<RawCursor<Hit<KEY,VALUE>,IOException>> toRemoveFromOnClose;
private boolean closed;

NativeHitIndexProgressor( RawCursor<Hit<KEY,VALUE>,IOException> seeker, NodeValueClient client,
Collection<RawCursor<Hit<KEY,VALUE>,IOException>> toRemoveFromOnClose )
{
this.seeker = seeker;
this.client = client;
this.toRemoveFromOnClose = toRemoveFromOnClose;
super( seeker, client, toRemoveFromOnClose );
}

@Override
Expand Down Expand Up @@ -69,27 +61,4 @@ protected boolean acceptValue( Value[] values )
{
return true;
}

Value[] extractValues( KEY key )
{
return client.needsValues() ? new Value[]{ key.asValue()} : null;
}

@Override
public void close()
{
if ( !closed )
{
closed = true;
try
{
seeker.close();
toRemoveFromOnClose.remove( seeker );
}
catch ( IOException e )
{
throw new UncheckedIOException( e );
}
}
}
}
@@ -0,0 +1,68 @@
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.neo4j.kernel.impl.index.schema;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collection;

import org.neo4j.cursor.RawCursor;
import org.neo4j.index.internal.gbptree.Hit;
import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.values.storable.Value;

abstract class NativeIndexProgressor<KEY extends NativeSchemaKey<KEY>, VALUE extends NativeSchemaValue> implements IndexProgressor
{
final RawCursor<Hit<KEY,VALUE>,IOException> seeker;
final NodeValueClient client;
private final Collection<RawCursor<Hit<KEY,VALUE>,IOException>> toRemoveFromOnClose;
private boolean closed;

NativeIndexProgressor( RawCursor<Hit<KEY,VALUE>,IOException> seeker, NodeValueClient client,
Collection<RawCursor<Hit<KEY,VALUE>,IOException>> toRemoveFromOnClose )
{
this.seeker = seeker;
this.client = client;
this.toRemoveFromOnClose = toRemoveFromOnClose;
}

@Override
public void close()
{
if ( !closed )
{
closed = true;
try
{
seeker.close();
toRemoveFromOnClose.remove( seeker );
}
catch ( IOException e )
{
throw new UncheckedIOException( e );
}
}
}

Value[] extractValues( KEY key )
{
return client.needsValues() ? new Value[]{ key.asValue()} : null;
}
}
Expand Up @@ -151,6 +151,26 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder
@Override
public abstract boolean hasFullValuePrecision( IndexQuery... predicates );

@Override
public void distinctValues( IndexProgressor.NodeValueClient client )
{
KEY lowest = layout.newKey();
lowest.initialize( Long.MIN_VALUE );
lowest.initValueAsLowest();
KEY highest = layout.newKey();
highest.initialize( Long.MAX_VALUE );
highest.initValueAsHighest();
try
{
RawCursor<Hit<KEY,VALUE>,IOException> seeker = tree.seek( lowest, highest );
client.initialize( descriptor, new NativeDistinctValuesProgressor<>( seeker, client, openSeekers, (SchemaLayout<KEY>) layout ), new IndexQuery[0] );
}
catch ( IOException e )
{
throw new UncheckedIOException( e );
}
}

abstract void validateQuery( IndexOrder indexOrder, IndexQuery[] predicates );

/**
Expand Down
Expand Up @@ -133,6 +133,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}

@Override
public void distinctValues( IndexProgressor.NodeValueClient cursor )
{
loadAll();
BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() );
cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
for ( NativeSchemaIndexReader<?,NativeSchemaValue> reader : this )
{
reader.distinctValues( multiProgressor );
}
}

private boolean validPredicate( IndexQuery predicate )
{
return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate;
Expand Down
Expand Up @@ -118,6 +118,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return true;
}

@Override
public void distinctValues( IndexProgressor.NodeValueClient cursor )
{
loadAll();
BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() );
cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
for ( NativeSchemaIndexReader<?,NativeSchemaValue> reader : this )
{
reader.distinctValues( multiProgressor );
}
}

private boolean validPredicate( IndexQuery predicate )
{
return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate;
Expand Down
Expand Up @@ -100,6 +100,15 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder
}
}

@Override
public void distinctValues( IndexProgressor.NodeValueClient cursor )
{
BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor,
descriptor.schema().getPropertyIds() );
cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
instanceSelector.forAll( reader -> reader.distinctValues( multiProgressor ) );
}

@Override
public boolean hasFullValuePrecision( IndexQuery... predicates )
{
Expand Down
Expand Up @@ -69,6 +69,19 @@ void query(
*/
boolean hasFullValuePrecision( IndexQuery... predicates );

/**
* Initializes {@code client} to be able to progress through all distinct values in this index. {@link IndexProgressor.NodeValueClient}
* is used because it has a perfect method signature, even if the {@code reference} argument will instead be used
* as number of index entries for the specific indexed value.
*
* {@link IndexProgressor.NodeValueClient#needsValues()} decides whether or not values will be materialized and given to the client.
* The use-case for setting this to {@code false} is to have a more efficient counting of distinct values in an index,
* regardless of the actual values.
*
* @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression.
*/
void distinctValues( IndexProgressor.NodeValueClient client );

IndexReader EMPTY = new IndexReader()
{
// Used for checking index correctness
Expand All @@ -93,7 +106,7 @@ public PrimitiveLongResourceIterator query( IndexQuery[] predicates )
@Override
public void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder, IndexQuery... query )
{
//do nothing
// do nothing
}

@Override
Expand All @@ -106,5 +119,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
{
return true;
}

@Override
public void distinctValues( IndexProgressor.NodeValueClient client )
{
// do nothing
}
};
}
Expand Up @@ -33,6 +33,7 @@
import org.neo4j.helpers.collection.Iterables;
import org.neo4j.internal.kernel.api.IndexQuery;
import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor;
import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.storageengine.api.schema.IndexSampler;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.ValueGroup;
Expand Down Expand Up @@ -269,6 +270,12 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}

@Override
public void distinctValues( IndexProgressor.NodeValueClient client )
{
throw new UnsupportedOperationException();
}

private interface StringFilter
{
boolean test( String s );
Expand Down

0 comments on commit f9c39ab

Please sign in to comment.