From 8880b9f743cc4b16484887481d83ce04fb85fe6f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mattias=20Finn=C3=A9?= Date: Sun, 16 Sep 2018 21:31:45 +0200 Subject: [PATCH] Adds IndexReader#distinctValues With main purpose to access distinct values and counts for each value regardless of index provider. There are procedures floating around for doing this on IndexReader from provider backed by Lucene, but this doesn't work for native indexes, until now. Two points of ugliness in this commit: - A cast from Layout -> SchemaLayout in NativeSchemaIndexReader. This is done to prevent a bigger refactoring, which have already been done in 3.5. - HashBasedIndex just throws UnsupportedOperationException in its distinctValues. This is because this functionality isn't really needed for this testing index and HashBasedIndex and in-memory indexing as a whole has been removed in 3.5. --- .../impl/fulltext/FulltextIndexReader.java | 6 + .../storageengine/api/schema/IndexReader.java | 21 ++- .../NativeDistinctValuesProgressor.java | 90 ++++++++++ .../schema/NativeHitIndexProgressor.java | 35 +--- .../index/schema/NativeIndexProgressor.java | 68 ++++++++ .../impl/index/schema/NativeIndexReader.java | 21 +++ .../impl/index/schema/SpatialIndexReader.java | 12 ++ .../index/schema/TemporalIndexReader.java | 12 ++ .../schema/fusion/FusionIndexReader.java | 9 + .../schema/GatheringNodeValueClient.java | 64 +++++++ .../NativeDistinctValuesProgressorTest.java | 159 ++++++++++++++++++ .../api/schema/DefaultIndexReaderTest.java | 5 + .../QueryResultComparingIndexReader.java | 6 + .../kernel/api/impl/schema/ValueEncoding.java | 16 +- .../LuceneDistinctValuesProgressor.java | 69 ++++++++ .../schema/reader/PartitionedIndexReader.java | 9 +- .../impl/schema/reader/SimpleIndexReader.java | 51 ++++++ .../SimpleIndexReaderDistinctValuesTest.java | 135 +++++++++++++++ 18 files changed, 745 insertions(+), 43 deletions(-) create mode 100644 community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java create mode 100644 community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java create mode 100644 community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java create mode 100644 community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java create mode 100644 community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java create mode 100644 community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java diff --git a/community/fulltext-index/src/main/java/org/neo4j/kernel/api/impl/fulltext/FulltextIndexReader.java b/community/fulltext-index/src/main/java/org/neo4j/kernel/api/impl/fulltext/FulltextIndexReader.java index 73215562df134..99511c1535ff9 100644 --- a/community/fulltext-index/src/main/java/org/neo4j/kernel/api/impl/fulltext/FulltextIndexReader.java +++ b/community/fulltext-index/src/main/java/org/neo4j/kernel/api/impl/fulltext/FulltextIndexReader.java @@ -58,6 +58,12 @@ public void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder throw new IndexNotApplicableKernelException( "Fulltext indexes does not support IndexQuery queries" ); } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + throw new UnsupportedOperationException( "Fulltext indexes does not support distinctValues queries" ); + } + @Override public boolean hasFullValuePrecision( IndexQuery... predicates ) { diff --git a/community/kernel-api/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java b/community/kernel-api/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java index 5ba1dadcbc053..d5505cf49486d 100644 --- a/community/kernel-api/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java +++ b/community/kernel-api/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java @@ -68,6 +68,19 @@ void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder, boole */ boolean hasFullValuePrecision( IndexQuery... predicates ); + /** + * Initializes {@code client} to be able to progress through all distinct values in this index. {@link IndexProgressor.NodeValueClient} + * is used because it has a perfect method signature, even if the {@code reference} argument will instead be used + * as number of index entries for the specific indexed value. + * + * {@link IndexProgressor.NodeValueClient#needsValues()} decides whether or not values will be materialized and given to the client. + * The use-case for setting this to {@code false} is to have a more efficient counting of distinct values in an index, + * regardless of the actual values. + * + * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression. + */ + void distinctValues( IndexProgressor.NodeValueClient client ); + IndexReader EMPTY = new IndexReader() { // Used for checking index correctness @@ -92,7 +105,7 @@ public PrimitiveLongResourceIterator query( IndexQuery[] predicates ) @Override public void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder, boolean needsValues, IndexQuery... query ) { - //do nothing + // do nothing } @Override @@ -105,5 +118,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) { return true; } + + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + // do nothing + } }; } diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java new file mode 100644 index 0000000000000..48a3f3688cb35 --- /dev/null +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; + +public class NativeDistinctValuesProgressor, VALUE extends NativeIndexValue> extends NativeIndexProgressor +{ + private final IndexLayout layout; + private final KEY prev; + private boolean first = true; + private long countForCurrentValue; + private boolean last; + + NativeDistinctValuesProgressor( RawCursor,IOException> seeker, NodeValueClient client, + Collection,IOException>> toRemoveFromOnClose, IndexLayout layout ) + { + super( seeker, client, toRemoveFromOnClose ); + this.layout = layout; + prev = layout.newKey(); + } + + @Override + public boolean next() + { + try + { + while ( seeker.next() ) + { + KEY key = seeker.get().key(); + try + { + if ( first ) + { + first = false; + countForCurrentValue = 1; + } + else if ( layout.compareValue( prev, key ) == 0 ) + { + // same as previous + countForCurrentValue++; + } + else + { + // different from previous + boolean accepted = client.acceptNode( countForCurrentValue, extractValues( prev ) ); + countForCurrentValue = 1; + if ( accepted ) + { + return true; + } + } + } + finally + { + layout.copyKey( key, prev ); + } + } + boolean finalResult = !first && !last && client.acceptNode( countForCurrentValue, extractValues( prev ) ); + last = true; + return finalResult; + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } +} diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java index 9532d4a87cd98..c92b7edbf3bde 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java @@ -25,22 +25,14 @@ import org.neo4j.cursor.RawCursor; import org.neo4j.index.internal.gbptree.Hit; -import org.neo4j.storageengine.api.schema.IndexProgressor; import org.neo4j.values.storable.Value; -public class NativeHitIndexProgressor, VALUE extends NativeIndexValue> implements IndexProgressor +public class NativeHitIndexProgressor, VALUE extends NativeIndexValue> extends NativeIndexProgressor { - private final RawCursor,IOException> seeker; - private final NodeValueClient client; - private final Collection,IOException>> toRemoveFromOnClose; - private boolean closed; - NativeHitIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client, Collection,IOException>> toRemoveFromOnClose ) { - this.seeker = seeker; - this.client = client; - this.toRemoveFromOnClose = toRemoveFromOnClose; + super( seeker, client, toRemoveFromOnClose ); } @Override @@ -69,27 +61,4 @@ protected boolean acceptValue( Value[] values ) { return true; } - - Value[] extractValues( KEY key ) - { - return client.needsValues() ? key.asValues() : null; - } - - @Override - public void close() - { - if ( !closed ) - { - closed = true; - try - { - seeker.close(); - toRemoveFromOnClose.remove( seeker ); - } - catch ( IOException e ) - { - throw new UncheckedIOException( e ); - } - } - } } diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java new file mode 100644 index 0000000000000..8ba7f317aa2ee --- /dev/null +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +abstract class NativeIndexProgressor, VALUE extends NativeIndexValue> implements IndexProgressor +{ + final RawCursor,IOException> seeker; + final NodeValueClient client; + private final Collection,IOException>> toRemoveFromOnClose; + private boolean closed; + + NativeIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client, + Collection,IOException>> toRemoveFromOnClose ) + { + this.seeker = seeker; + this.client = client; + this.toRemoveFromOnClose = toRemoveFromOnClose; + } + + @Override + public void close() + { + if ( !closed ) + { + closed = true; + try + { + seeker.close(); + toRemoveFromOnClose.remove( seeker ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + } + + Value[] extractValues( KEY key ) + { + return client.needsValues() ? key.asValues() : null; + } +} diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexReader.java index c28274779a38a..1633a0de65860 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexReader.java @@ -136,6 +136,27 @@ void initializeFromToKeys( KEY treeKeyFrom, KEY treeKeyTo ) @Override public abstract boolean hasFullValuePrecision( IndexQuery... predicates ); + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + KEY lowest = layout.newKey(); + lowest.initialize( Long.MIN_VALUE ); + lowest.initValuesAsLowest(); + KEY highest = layout.newKey(); + highest.initialize( Long.MAX_VALUE ); + highest.initValuesAsHighest(); + try + { + RawCursor,IOException> seeker = tree.seek( lowest, highest ); + client.initialize( descriptor, new NativeDistinctValuesProgressor<>( seeker, client, openSeekers, (IndexLayout) layout ), + new IndexQuery[0], IndexOrder.NONE, client.needsValues() ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + abstract void validateQuery( IndexOrder indexOrder, IndexQuery[] predicates ); /** diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java index 5348b5e8fa090..e9a8946a375dc 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java @@ -146,6 +146,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + loadAll(); + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0], IndexOrder.NONE, false ); + for ( NativeIndexReader reader : this ) + { + reader.distinctValues( multiProgressor ); + } + } + private boolean validPredicate( IndexQuery predicate ) { return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate; diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java index 69cf3cad605bf..7efd51ed4dac7 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java @@ -125,6 +125,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return true; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + loadAll(); + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0], IndexOrder.NONE, cursor.needsValues() ); + for ( NativeIndexReader reader : this ) + { + reader.distinctValues( multiProgressor ); + } + } + private boolean validPredicate( IndexQuery predicate ) { return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate; diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java index 9f50d1813965f..e8a12bda2c199 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java @@ -129,6 +129,15 @@ public synchronized IndexNotApplicableKernelException getCause() } } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, + descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0], IndexOrder.NONE, cursor.needsValues() ); + instanceSelector.forAll( reader -> reader.distinctValues( multiProgressor ) ); + } + @Override public boolean hasFullValuePrecision( IndexQuery... predicates ) { diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java new file mode 100644 index 0000000000000..f73f65d14f70f --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java @@ -0,0 +1,64 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import org.neo4j.internal.kernel.api.IndexOrder; +import org.neo4j.internal.kernel.api.IndexQuery; +import org.neo4j.storageengine.api.schema.IndexDescriptor; +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +/** + * Simple NodeValueClient test utility. + */ +public class GatheringNodeValueClient implements IndexProgressor.NodeValueClient +{ + public long reference; + public Value[] values; + public IndexDescriptor descriptor; + public IndexProgressor progressor; + public IndexQuery[] query; + public IndexOrder order; + public boolean needsValues; + + @Override + public void initialize( IndexDescriptor descriptor, IndexProgressor progressor, IndexQuery[] query, IndexOrder order, boolean needsValues ) + { + this.descriptor = descriptor; + this.progressor = progressor; + this.query = query; + this.order = order; + this.needsValues = needsValues; + } + + @Override + public boolean acceptNode( long reference, Value... values ) + { + this.reference = reference; + this.values = values; + return true; + } + + @Override + public boolean needsValues() + { + return needsValues; + } +} diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java new file mode 100644 index 0000000000000..4dce64a8f442b --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java @@ -0,0 +1,159 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; +import org.neo4j.test.rule.RandomRule; +import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.neo4j.kernel.impl.index.schema.NativeIndexKey.Inclusion.NEUTRAL; +import static org.neo4j.kernel.impl.index.schema.NativeIndexValue.INSTANCE; +import static org.neo4j.values.storable.Values.stringValue; + +public class NativeDistinctValuesProgressorTest +{ + private final StringLayout layout = new StringLayout(); + + @Rule + public final RandomRule random = new RandomRule(); + + @Test + public void shouldCountDistinctValues() + { + // given + Value[] strings = generateRandomStrings(); + DataCursor source = new DataCursor( asHitData( strings ) ); + GatheringNodeValueClient client = new GatheringNodeValueClient(); + + // when + NativeDistinctValuesProgressor progressor = + new NativeDistinctValuesProgressor<>( source, client, new ArrayList<>(), layout ); + Map expectedCounts = asDistinctCounts( strings ); + + // then + int uniqueValues = 0; + int nonUniqueValues = 0; + while ( progressor.next() ) + { + Value string = client.values[0]; + MutableInt expectedCount = expectedCounts.remove( string ); + assertNotNull( expectedCount ); + assertEquals( expectedCount.intValue(), client.reference ); + + if ( expectedCount.intValue() > 1 ) + { + nonUniqueValues++; + } + else + { + uniqueValues++; + } + } + assertTrue( expectedCounts.isEmpty() ); + assertTrue( uniqueValues > 0 ); + assertTrue( nonUniqueValues > 0 ); + } + + private Map asDistinctCounts( Value[] strings ) + { + Map map = new HashMap<>(); + for ( Value string : strings ) + { + map.computeIfAbsent( string, s -> new MutableInt( 0 ) ).increment(); + } + return map; + } + + private Value[] generateRandomStrings() + { + Value[] strings = new Value[1_000]; + for ( int i = 0; i < strings.length; i++ ) + { + // Potential for a lot of duplicates + strings[i] = stringValue( String.valueOf( random.nextInt( 1_000 ) ) ); + } + Arrays.sort( strings, Values.COMPARATOR ); + return strings; + } + + private Collection> asHitData( Value[] strings ) + { + Collection> data = new ArrayList<>( strings.length ); + for ( int i = 0; i < strings.length; i++ ) + { + StringIndexKey key = layout.newKey(); + key.initialize( i ); + key.initFromValue( 0, strings[i], NEUTRAL ); + data.add( new SimpleHit<>( key, INSTANCE ) ); + } + return data; + } + + private static class DataCursor implements RawCursor,IOException> + { + private final Iterator> iterator; + private Hit current; + + DataCursor( Collection> data ) + { + this.iterator = data.iterator(); + } + + @Override + public boolean next() throws RuntimeException + { + if ( !iterator.hasNext() ) + { + return false; + } + current = iterator.next(); + return true; + } + + @Override + public void close() throws RuntimeException + { + // Nothing to close + } + + @Override + public Hit get() + { + return current; + } + } +} diff --git a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java index ea493b0dd953a..85bbdb09f54ad 100644 --- a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java +++ b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java @@ -73,6 +73,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + } + @Override public void close() { diff --git a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/QueryResultComparingIndexReader.java b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/QueryResultComparingIndexReader.java index 938def2d6d480..ec924ce5dd0ca 100644 --- a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/QueryResultComparingIndexReader.java +++ b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/QueryResultComparingIndexReader.java @@ -170,6 +170,12 @@ public boolean needsValues() actual.query( wrappedClient, indexOrder, needsValues, query ); } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + actual.distinctValues( client ); + } + @Override public boolean hasFullValuePrecision( IndexQuery... predicates ) { diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java index 276ec09727944..a5adebe162566 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java @@ -39,12 +39,12 @@ * Enumeration representing all possible property types with corresponding encodings and query structures for Lucene * schema indexes. */ -enum ValueEncoding +public enum ValueEncoding { Number { @Override - String key() + public String key() { return "number"; } @@ -78,7 +78,7 @@ Query encodeQuery( Value value, int propertyNumber ) Array { @Override - String key() + public String key() { return "array"; } @@ -111,7 +111,7 @@ Query encodeQuery( Value value, int propertyNumber ) Bool { @Override - String key() + public String key() { return "bool"; } @@ -144,7 +144,7 @@ Query encodeQuery( Value value, int propertyNumber ) Spatial { @Override - String key() + public String key() { return "spatial"; } @@ -180,7 +180,7 @@ Query encodeQuery( Value value, int propertyNumber ) Temporal { @Override - String key() + public String key() { return "temporal"; } @@ -213,7 +213,7 @@ Query encodeQuery( Value value, int propertyNumber ) String { @Override - String key() + public String key() { return "string"; } @@ -247,7 +247,7 @@ Query encodeQuery( Value value, int propertyNumber ) private static final ValueEncoding[] AllEncodings = values(); - abstract String key(); + public abstract String key(); String key( int propertyNumber ) { diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java new file mode 100644 index 0000000000000..9472c942fb0b5 --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.reader; + +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Function; + +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +class LuceneDistinctValuesProgressor implements IndexProgressor +{ + private final TermsEnum terms; + private final NodeValueClient client; + private final Function valueMaterializer; + + LuceneDistinctValuesProgressor( TermsEnum terms, NodeValueClient client, Function valueMaterializer ) throws IOException + { + this.terms = terms; + this.client = client; + this.valueMaterializer = valueMaterializer; + } + + @Override + public boolean next() + { + try + { + while ( (terms.next()) != null ) + { + if ( client.acceptNode( terms.docFreq(), valueMaterializer.apply( terms.term() ) ) ) + { + return true; + } + } + return false; + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + + @Override + public void close() + { + } +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java index 4b78e7527776d..07a3bde45d916 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java @@ -50,7 +50,6 @@ */ public class PartitionedIndexReader extends AbstractIndexReader { - private final List indexReaders; public PartitionedIndexReader( List partitionSearchers, @@ -115,6 +114,14 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + BridgingIndexProgressor bridgingIndexProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() ); + indexReaders.parallelStream().forEach( reader -> reader.distinctValues( bridgingIndexProgressor ) ); + client.initialize( descriptor, bridgingIndexProgressor, new IndexQuery[0], IndexOrder.NONE, client.needsValues() ); + } + private PrimitiveLongResourceIterator innerQuery( IndexReader reader, IndexQuery[] predicates ) { try diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java index b9b1e9a37250f..9d0b023816602 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java @@ -19,15 +19,23 @@ */ package org.neo4j.kernel.api.impl.schema.reader; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Arrays; +import java.util.function.Function; import org.neo4j.collection.PrimitiveLongResourceIterator; import org.neo4j.helpers.TaskControl; @@ -39,14 +47,17 @@ import org.neo4j.kernel.api.impl.index.collector.DocValuesCollector; import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher; import org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure; +import org.neo4j.kernel.api.impl.schema.ValueEncoding; import org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler; import org.neo4j.kernel.api.impl.schema.sampler.UniqueLuceneIndexSampler; import org.neo4j.kernel.impl.api.index.sampling.IndexSamplingConfig; +import org.neo4j.kernel.impl.api.schema.BridgingIndexProgressor; import org.neo4j.storageengine.api.schema.AbstractIndexReader; import org.neo4j.storageengine.api.schema.IndexDescriptor; import org.neo4j.storageengine.api.schema.IndexProgressor; import org.neo4j.storageengine.api.schema.IndexSampler; import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; import static java.lang.String.format; import static org.neo4j.internal.kernel.api.IndexQuery.IndexQueryType.exact; @@ -187,6 +198,46 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + /** + * OBS this implementation can only provide values for properties of type {@link String}. + * Other property types will still be counted as distinct, but {@code client} won't receive {@link Value} + * instances for those. + * + * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression. + */ + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + try + { + IndexQuery[] noQueries = new IndexQuery[0]; + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() ); + Fields fields = MultiFields.getFields( getIndexSearcher().getIndexReader() ); + for ( ValueEncoding valueEncoding : ValueEncoding.values() ) + { + Terms terms = fields.terms( valueEncoding.key() ); + if ( terms != null ) + { + Function valueMaterializer = valueEncoding == ValueEncoding.String && client.needsValues() + ? term -> Values.stringValue( term.utf8ToString() ) + : term -> null; + TermsEnum termsIterator = terms.iterator(); + if ( valueEncoding == ValueEncoding.Number ) + { + termsIterator = NumericUtils.filterPrefixCodedLongs( termsIterator ); + } + multiProgressor.initialize( descriptor, new LuceneDistinctValuesProgressor( termsIterator, client, valueMaterializer ), noQueries, + IndexOrder.NONE, client.needsValues() ); + } + } + client.initialize( descriptor, multiProgressor, noQueries, IndexOrder.NONE, client.needsValues() ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + private void assertNotComposite( IndexQuery[] predicates ) { assert predicates.length == 1 : "composite indexes not yet supported for this operation"; diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java new file mode 100644 index 0000000000000..2191fae7945fa --- /dev/null +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.reader; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.neo4j.kernel.api.impl.schema.LuceneSchemaIndexBuilder; +import org.neo4j.kernel.api.impl.schema.SchemaIndex; +import org.neo4j.kernel.api.impl.schema.writer.LuceneIndexWriter; +import org.neo4j.kernel.api.schema.SchemaDescriptorFactory; +import org.neo4j.kernel.configuration.Config; +import org.neo4j.kernel.impl.index.schema.GatheringNodeValueClient; +import org.neo4j.storageengine.api.schema.IndexDescriptorFactory; +import org.neo4j.storageengine.api.schema.IndexReader; +import org.neo4j.test.rule.RandomRule; +import org.neo4j.test.rule.TestDirectory; +import org.neo4j.test.rule.fs.DefaultFileSystemRule; +import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure.documentRepresentingProperties; +import static org.neo4j.values.storable.Values.stringValue; + +public class SimpleIndexReaderDistinctValuesTest +{ + @Rule + public final RandomRule random = new RandomRule(); + @Rule + public final DefaultFileSystemRule fs = new DefaultFileSystemRule(); + @Rule + public final TestDirectory directory = TestDirectory.testDirectory( fs ); + private SchemaIndex index; + + @Before + public void setup() throws IOException + { + index = LuceneSchemaIndexBuilder.create( IndexDescriptorFactory.forSchema( SchemaDescriptorFactory.forLabel( 1, 1 ) ), Config.defaults() ) + .withFileSystem( fs ) + .withIndexRootFolder( directory.directory() ) + .build(); + index.create(); + index.open(); + } + + @After + public void tearDown() throws IOException + { + index.close(); + } + + @Test + public void shouldGetDistinctStringValues() throws IOException + { + // given + LuceneIndexWriter writer = index.getIndexWriter(); + Map expectedCounts = new HashMap<>(); + for ( int i = 0; i < 10_000; i++ ) + { + Value value = stringValue( String.valueOf( random.nextInt( 1_000 ) ) ); + writer.addDocument( documentRepresentingProperties( i, value ) ); + expectedCounts.computeIfAbsent( value, v -> new MutableInt( 0 ) ).increment(); + } + index.maybeRefreshBlocking(); + + // when/then + GatheringNodeValueClient client = new GatheringNodeValueClient(); + try ( IndexReader reader = index.getIndexReader() ) + { + reader.distinctValues( client ); + while ( client.progressor.next() ) + { + Value value = client.values[0]; + MutableInt expectedCount = expectedCounts.remove( value ); + assertNotNull( expectedCount ); + assertEquals( expectedCount.intValue(), client.reference ); + } + assertTrue( expectedCounts.isEmpty() ); + } + } + + @Test + public void shouldCountDistinctValues() throws IOException + { + // given + LuceneIndexWriter writer = index.getIndexWriter(); + int expectedCount = 10_000; + for ( int i = 0; i < expectedCount; i++ ) + { + Value value = Values.of( random.nextValue() ); + writer.addDocument( documentRepresentingProperties( i, value ) ); + } + index.maybeRefreshBlocking(); + + // when/then + GatheringNodeValueClient client = new GatheringNodeValueClient(); + try ( IndexReader reader = index.getIndexReader() ) + { + reader.distinctValues( client ); + int actualCount = 0; + while ( client.progressor.next() ) + { + actualCount += client.reference; + } + assertEquals( expectedCount, actualCount ); + } + } +}