diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java new file mode 100644 index 0000000000000..909d148edc8a8 --- /dev/null +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; + +public class NativeDistinctValuesProgressor, VALUE extends NativeSchemaValue> extends NativeIndexProgressor +{ + private final SchemaLayout layout; + private final KEY prev; + private boolean first = true; + private long countForCurrentValue; + private boolean last; + + NativeDistinctValuesProgressor( RawCursor,IOException> seeker, NodeValueClient client, + Collection,IOException>> toRemoveFromOnClose, SchemaLayout layout ) + { + super( seeker, client, toRemoveFromOnClose ); + this.layout = layout; + prev = layout.newKey(); + } + + @Override + public boolean next() + { + try + { + while ( seeker.next() ) + { + KEY key = seeker.get().key(); + try + { + if ( first ) + { + first = false; + countForCurrentValue = 1; + } + else if ( layout.compareValue( prev, key ) == 0 ) + { + // same as previous + countForCurrentValue++; + } + else + { + // different from previous + boolean accepted = client.acceptNode( countForCurrentValue, extractValues( prev ) ); + countForCurrentValue = 1; + if ( accepted ) + { + return true; + } + } + } + finally + { + layout.copyKey( key, prev ); + } + } + boolean finalResult = !first && !last && client.acceptNode( countForCurrentValue, extractValues( prev ) ); + last = true; + return finalResult; + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } +} diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java index fad784aafdf64..43f5fa7f500a7 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java @@ -25,22 +25,14 @@ import org.neo4j.cursor.RawCursor; import org.neo4j.index.internal.gbptree.Hit; -import org.neo4j.storageengine.api.schema.IndexProgressor; import org.neo4j.values.storable.Value; -public class NativeHitIndexProgressor, VALUE extends NativeSchemaValue> implements IndexProgressor +public class NativeHitIndexProgressor, VALUE extends NativeSchemaValue> extends NativeIndexProgressor { - private final RawCursor,IOException> seeker; - private final NodeValueClient client; - private final Collection,IOException>> toRemoveFromOnClose; - private boolean closed; - NativeHitIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client, Collection,IOException>> toRemoveFromOnClose ) { - this.seeker = seeker; - this.client = client; - this.toRemoveFromOnClose = toRemoveFromOnClose; + super( seeker, client, toRemoveFromOnClose ); } @Override @@ -69,27 +61,4 @@ protected boolean acceptValue( Value[] values ) { return true; } - - Value[] extractValues( KEY key ) - { - return client.needsValues() ? new Value[]{ key.asValue()} : null; - } - - @Override - public void close() - { - if ( !closed ) - { - closed = true; - try - { - seeker.close(); - toRemoveFromOnClose.remove( seeker ); - } - catch ( IOException e ) - { - throw new UncheckedIOException( e ); - } - } - } } diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java new file mode 100644 index 0000000000000..7142626933bac --- /dev/null +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.Collection; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +abstract class NativeIndexProgressor, VALUE extends NativeSchemaValue> implements IndexProgressor +{ + final RawCursor,IOException> seeker; + final NodeValueClient client; + private final Collection,IOException>> toRemoveFromOnClose; + private boolean closed; + + NativeIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client, + Collection,IOException>> toRemoveFromOnClose ) + { + this.seeker = seeker; + this.client = client; + this.toRemoveFromOnClose = toRemoveFromOnClose; + } + + @Override + public void close() + { + if ( !closed ) + { + closed = true; + try + { + seeker.close(); + toRemoveFromOnClose.remove( seeker ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + } + + Value[] extractValues( KEY key ) + { + return client.needsValues() ? new Value[]{ key.asValue()} : null; + } +} diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java index 554f5081287e4..ef8c2a56b65c9 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java @@ -151,6 +151,26 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder @Override public abstract boolean hasFullValuePrecision( IndexQuery... predicates ); + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + KEY lowest = layout.newKey(); + lowest.initialize( Long.MIN_VALUE ); + lowest.initValueAsLowest(); + KEY highest = layout.newKey(); + highest.initialize( Long.MAX_VALUE ); + highest.initValueAsHighest(); + try + { + RawCursor,IOException> seeker = tree.seek( lowest, highest ); + client.initialize( descriptor, new NativeDistinctValuesProgressor<>( seeker, client, openSeekers, (SchemaLayout) layout ), new IndexQuery[0] ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + abstract void validateQuery( IndexOrder indexOrder, IndexQuery[] predicates ); /** diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java index 732634fd367c3..1c08ef2feabfb 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java @@ -133,6 +133,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + loadAll(); + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] ); + for ( NativeSchemaIndexReader reader : this ) + { + reader.distinctValues( multiProgressor ); + } + } + private boolean validPredicate( IndexQuery predicate ) { return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate; diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java index 252e1c61843a3..c6b9e28c44b40 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java @@ -118,6 +118,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return true; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + loadAll(); + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] ); + for ( NativeSchemaIndexReader reader : this ) + { + reader.distinctValues( multiProgressor ); + } + } + private boolean validPredicate( IndexQuery predicate ) { return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate; diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java index 47e2df77b2f98..a3ad579849f70 100644 --- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java @@ -100,6 +100,15 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder } } + @Override + public void distinctValues( IndexProgressor.NodeValueClient cursor ) + { + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, + descriptor.schema().getPropertyIds() ); + cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] ); + instanceSelector.forAll( reader -> reader.distinctValues( multiProgressor ) ); + } + @Override public boolean hasFullValuePrecision( IndexQuery... predicates ) { diff --git a/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java b/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java index 025f9cb8f37a1..52d734018ec80 100644 --- a/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java +++ b/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java @@ -69,6 +69,19 @@ void query( */ boolean hasFullValuePrecision( IndexQuery... predicates ); + /** + * Initializes {@code client} to be able to progress through all distinct values in this index. {@link IndexProgressor.NodeValueClient} + * is used because it has a perfect method signature, even if the {@code reference} argument will instead be used + * as number of index entries for the specific indexed value. + * + * {@link IndexProgressor.NodeValueClient#needsValues()} decides whether or not values will be materialized and given to the client. + * The use-case for setting this to {@code false} is to have a more efficient counting of distinct values in an index, + * regardless of the actual values. + * + * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression. + */ + void distinctValues( IndexProgressor.NodeValueClient client ); + IndexReader EMPTY = new IndexReader() { // Used for checking index correctness @@ -93,7 +106,7 @@ public PrimitiveLongResourceIterator query( IndexQuery[] predicates ) @Override public void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder, IndexQuery... query ) { - //do nothing + // do nothing } @Override @@ -106,5 +119,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) { return true; } + + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + // do nothing + } }; } diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java index 6a33e32e05ec5..97e3311255f95 100644 --- a/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java @@ -33,6 +33,7 @@ import org.neo4j.helpers.collection.Iterables; import org.neo4j.internal.kernel.api.IndexQuery; import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor; +import org.neo4j.storageengine.api.schema.IndexProgressor; import org.neo4j.storageengine.api.schema.IndexSampler; import org.neo4j.values.storable.Value; import org.neo4j.values.storable.ValueGroup; @@ -269,6 +270,12 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + throw new UnsupportedOperationException(); + } + private interface StringFilter { boolean test( String s ); diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java new file mode 100644 index 0000000000000..9a6987471a486 --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import org.neo4j.internal.kernel.api.IndexQuery; +import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor; +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +/** + * Simple NodeValueClient test utility. + */ +public class GatheringNodeValueClient implements IndexProgressor.NodeValueClient +{ + public long reference; + public Value[] values; + public SchemaIndexDescriptor descriptor; + public IndexProgressor progressor; + public IndexQuery[] query; + + @Override + public void initialize( SchemaIndexDescriptor descriptor, IndexProgressor progressor, IndexQuery[] query ) + { + this.descriptor = descriptor; + this.progressor = progressor; + this.query = query; + } + + @Override + public boolean acceptNode( long reference, Value... values ) + { + this.reference = reference; + this.values = values; + return true; + } + + @Override + public boolean needsValues() + { + return true; + } +} diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java new file mode 100644 index 0000000000000..cf55643d54541 --- /dev/null +++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java @@ -0,0 +1,158 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.impl.index.schema; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; + +import org.neo4j.cursor.RawCursor; +import org.neo4j.index.internal.gbptree.Hit; +import org.neo4j.test.Randoms; +import org.neo4j.test.rule.RandomRule; +import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.neo4j.kernel.impl.index.schema.NativeSchemaValue.INSTANCE; +import static org.neo4j.values.storable.Values.stringValue; + +public class NativeDistinctValuesProgressorTest +{ + private final StringLayout layout = new StringLayout(); + + @Rule + public final RandomRule random = new RandomRule(); + + @Test + public void shouldCountDistinctValues() + { + // given + Value[] strings = generateRandomStrings(); + DataCursor source = new DataCursor( asHitData( strings ) ); + GatheringNodeValueClient client = new GatheringNodeValueClient(); + + // when + NativeDistinctValuesProgressor progressor = + new NativeDistinctValuesProgressor<>( source, client, new ArrayList<>(), layout ); + Map expectedCounts = asDistinctCounts( strings ); + + // then + int uniqueValues = 0; + int nonUniqueValues = 0; + while ( progressor.next() ) + { + Value string = client.values[0]; + MutableInt expectedCount = expectedCounts.remove( string ); + assertNotNull( expectedCount ); + assertEquals( expectedCount.intValue(), client.reference ); + + if ( expectedCount.intValue() > 1 ) + { + nonUniqueValues++; + } + else + { + uniqueValues++; + } + } + assertTrue( expectedCounts.isEmpty() ); + assertTrue( uniqueValues > 0 ); + assertTrue( nonUniqueValues > 0 ); + } + + private Map asDistinctCounts( Value[] strings ) + { + Map map = new HashMap<>(); + for ( Value string : strings ) + { + map.computeIfAbsent( string, s -> new MutableInt( 0 ) ).increment(); + } + return map; + } + + private Value[] generateRandomStrings() + { + Value[] strings = new Value[1_000]; + for ( int i = 0; i < strings.length; i++ ) + { + // Potential for a lot of duplicates + strings[i] = stringValue( random.randoms().string( 1, 3, Randoms.CS_DIGITS ) ); + } + Arrays.sort( strings, Values.COMPARATOR ); + return strings; + } + + private Collection> asHitData( Value[] strings ) + { + Collection> data = new ArrayList<>( strings.length ); + for ( int i = 0; i < strings.length; i++ ) + { + StringSchemaKey key = layout.newKey(); + key.from( i, strings[i] ); + data.add( new SimpleHit<>( key, INSTANCE ) ); + } + return data; + } + + private static class DataCursor implements RawCursor,IOException> + { + private final Iterator> iterator; + private Hit current; + + DataCursor( Collection> data ) + { + this.iterator = data.iterator(); + } + + @Override + public boolean next() throws RuntimeException + { + if ( !iterator.hasNext() ) + { + return false; + } + current = iterator.next(); + return true; + } + + @Override + public void close() throws RuntimeException + { + // Nothing to close + } + + @Override + public Hit get() + { + return current; + } + } +} diff --git a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java index 2501379d4a10e..c6e39a2cd9c4b 100644 --- a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java +++ b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java @@ -27,7 +27,6 @@ import org.neo4j.collection.primitive.PrimitiveLongResourceIterator; import org.neo4j.internal.kernel.api.IndexOrder; import org.neo4j.internal.kernel.api.IndexQuery; -import org.neo4j.kernel.api.exceptions.index.IndexNotApplicableKernelException; import org.neo4j.values.storable.Value; public class DefaultIndexReaderTest @@ -77,6 +76,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + } + @Override public void close() { diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java index 909b3bd5c377c..d39ce68dccfe4 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java @@ -39,12 +39,12 @@ * Enumeration representing all possible property types with corresponding encodings and query structures for Lucene * schema indexes. */ -enum ValueEncoding +public enum ValueEncoding { Number { @Override - String key() + public String key() { return "number"; } @@ -78,7 +78,7 @@ Query encodeQuery( Value value, int propertyNumber ) Array { @Override - String key() + public String key() { return "array"; } @@ -111,7 +111,7 @@ Query encodeQuery( Value value, int propertyNumber ) Bool { @Override - String key() + public String key() { return "bool"; } @@ -144,7 +144,7 @@ Query encodeQuery( Value value, int propertyNumber ) Spatial { @Override - String key() + public String key() { return "spatial"; } @@ -180,7 +180,7 @@ Query encodeQuery( Value value, int propertyNumber ) Temporal { @Override - String key() + public String key() { return "temporal"; } @@ -213,7 +213,7 @@ Query encodeQuery( Value value, int propertyNumber ) String { @Override - String key() + public String key() { return "string"; } @@ -247,7 +247,7 @@ Query encodeQuery( Value value, int propertyNumber ) private static final ValueEncoding[] AllEncodings = values(); - abstract String key(); + public abstract String key(); String key( int propertyNumber ) { diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java new file mode 100644 index 0000000000000..9472c942fb0b5 --- /dev/null +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.reader; + +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.BytesRef; + +import java.io.IOException; +import java.io.UncheckedIOException; +import java.util.function.Function; + +import org.neo4j.storageengine.api.schema.IndexProgressor; +import org.neo4j.values.storable.Value; + +class LuceneDistinctValuesProgressor implements IndexProgressor +{ + private final TermsEnum terms; + private final NodeValueClient client; + private final Function valueMaterializer; + + LuceneDistinctValuesProgressor( TermsEnum terms, NodeValueClient client, Function valueMaterializer ) throws IOException + { + this.terms = terms; + this.client = client; + this.valueMaterializer = valueMaterializer; + } + + @Override + public boolean next() + { + try + { + while ( (terms.next()) != null ) + { + if ( client.acceptNode( terms.docFreq(), valueMaterializer.apply( terms.term() ) ) ) + { + return true; + } + } + return false; + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + + @Override + public void close() + { + } +} diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java index 3472e500f64f0..1bd9ffb91fdd6 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java @@ -50,7 +50,6 @@ */ public class PartitionedIndexReader extends AbstractIndexReader { - private final List indexReaders; public PartitionedIndexReader( List partitionSearchers, @@ -114,6 +113,14 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + BridgingIndexProgressor bridgingIndexProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() ); + indexReaders.parallelStream().forEach( reader -> reader.distinctValues( bridgingIndexProgressor ) ); + client.initialize( descriptor, bridgingIndexProgressor, new IndexQuery[0] ); + } + private PrimitiveLongResourceIterator innerQuery( IndexReader reader, IndexQuery[] predicates ) { try diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java index 3ba0feeccbc54..de987fdc35e6c 100644 --- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java +++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java @@ -19,15 +19,23 @@ */ package org.neo4j.kernel.api.impl.schema.reader; +import org.apache.lucene.index.Fields; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TotalHitCountCollector; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; import java.io.IOException; +import java.io.UncheckedIOException; import java.util.Arrays; +import java.util.function.Function; import org.neo4j.collection.primitive.PrimitiveLongResourceIterator; import org.neo4j.helpers.TaskControl; @@ -39,14 +47,17 @@ import org.neo4j.kernel.api.impl.index.collector.DocValuesCollector; import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher; import org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure; +import org.neo4j.kernel.api.impl.schema.ValueEncoding; import org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler; import org.neo4j.kernel.api.impl.schema.sampler.UniqueLuceneIndexSampler; import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor; import org.neo4j.kernel.impl.api.index.sampling.IndexSamplingConfig; +import org.neo4j.kernel.impl.index.schema.fusion.BridgingIndexProgressor; import org.neo4j.storageengine.api.schema.AbstractIndexReader; import org.neo4j.storageengine.api.schema.IndexProgressor; import org.neo4j.storageengine.api.schema.IndexSampler; import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; import static java.lang.String.format; import static org.neo4j.internal.kernel.api.IndexQuery.IndexQueryType.exact; @@ -186,6 +197,45 @@ public boolean hasFullValuePrecision( IndexQuery... predicates ) return false; } + /** + * OBS this implementation can only provide values for properties of type {@link String}. + * Other property types will still be counted as distinct, but {@code client} won't receive {@link Value} + * instances for those. + * + * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression. + */ + @Override + public void distinctValues( IndexProgressor.NodeValueClient client ) + { + try + { + IndexQuery[] noQueries = new IndexQuery[0]; + BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() ); + Fields fields = MultiFields.getFields( getIndexSearcher().getIndexReader() ); + for ( ValueEncoding valueEncoding : ValueEncoding.values() ) + { + Terms terms = fields.terms( valueEncoding.key() ); + if ( terms != null ) + { + Function valueMaterializer = valueEncoding == ValueEncoding.String && client.needsValues() + ? term -> Values.stringValue( term.utf8ToString() ) + : term -> null; + TermsEnum termsIterator = terms.iterator(); + if ( valueEncoding == ValueEncoding.Number ) + { + termsIterator = NumericUtils.filterPrefixCodedLongs( termsIterator ); + } + multiProgressor.initialize( descriptor, new LuceneDistinctValuesProgressor( termsIterator, client, valueMaterializer ), noQueries ); + } + } + client.initialize( descriptor, multiProgressor, noQueries ); + } + catch ( IOException e ) + { + throw new UncheckedIOException( e ); + } + } + private void assertNotComposite( IndexQuery[] predicates ) { assert predicates.length == 1 : "composite indexes not yet supported for this operation"; diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java new file mode 100644 index 0000000000000..036ebcc6ccd5e --- /dev/null +++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.kernel.api.impl.schema.reader; + +import org.apache.commons.lang3.mutable.MutableInt; +import org.junit.After; +import org.junit.Before; +import org.junit.Rule; +import org.junit.Test; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.neo4j.kernel.api.impl.schema.LuceneSchemaIndexBuilder; +import org.neo4j.kernel.api.impl.schema.SchemaIndex; +import org.neo4j.kernel.api.impl.schema.writer.LuceneIndexWriter; +import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptorFactory; +import org.neo4j.kernel.configuration.Config; +import org.neo4j.kernel.impl.index.schema.GatheringNodeValueClient; +import org.neo4j.storageengine.api.schema.IndexReader; +import org.neo4j.test.rule.RandomRule; +import org.neo4j.test.rule.TestDirectory; +import org.neo4j.test.rule.fs.DefaultFileSystemRule; +import org.neo4j.values.storable.Value; +import org.neo4j.values.storable.Values; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; +import static org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure.documentRepresentingProperties; +import static org.neo4j.test.Randoms.CS_DIGITS; +import static org.neo4j.values.storable.Values.stringValue; + +public class SimpleIndexReaderDistinctValuesTest +{ + @Rule + public final RandomRule random = new RandomRule(); + @Rule + public final DefaultFileSystemRule fs = new DefaultFileSystemRule(); + @Rule + public final TestDirectory directory = TestDirectory.testDirectory( fs ); + private SchemaIndex index; + + @Before + public void setup() throws IOException + { + index = LuceneSchemaIndexBuilder.create( SchemaIndexDescriptorFactory.forLabel( 1, 1 ), Config.defaults() ) + .withFileSystem( fs ) + .withIndexRootFolder( directory.directory() ) + .build(); + index.create(); + index.open(); + } + + @After + public void tearDown() throws IOException + { + index.close(); + } + + @Test + public void shouldGetDistinctStringValues() throws IOException + { + // given + LuceneIndexWriter writer = index.getIndexWriter(); + Map expectedCounts = new HashMap<>(); + for ( int i = 0; i < 10_000; i++ ) + { + Value value = stringValue( random.randoms().string( 1, 3, CS_DIGITS ) ); + writer.addDocument( documentRepresentingProperties( i, value ) ); + expectedCounts.computeIfAbsent( value, v -> new MutableInt( 0 ) ).increment(); + } + index.maybeRefreshBlocking(); + + // when/then + GatheringNodeValueClient client = new GatheringNodeValueClient(); + try ( IndexReader reader = index.getIndexReader() ) + { + reader.distinctValues( client ); + while ( client.progressor.next() ) + { + Value value = client.values[0]; + MutableInt expectedCount = expectedCounts.remove( value ); + assertNotNull( expectedCount ); + assertEquals( expectedCount.intValue(), client.reference ); + } + assertTrue( expectedCounts.isEmpty() ); + } + } + + @Test + public void shouldCountDistinctValues() throws IOException + { + // given + LuceneIndexWriter writer = index.getIndexWriter(); + int expectedCount = 10_000; + for ( int i = 0; i < expectedCount; i++ ) + { + Value value = Values.of( random.propertyValue() ); + writer.addDocument( documentRepresentingProperties( i, value ) ); + } + index.maybeRefreshBlocking(); + + // when/then + GatheringNodeValueClient client = new GatheringNodeValueClient(); + try ( IndexReader reader = index.getIndexReader() ) + { + reader.distinctValues( client ); + int actualCount = 0; + while ( client.progressor.next() ) + { + actualCount += client.reference; + } + assertEquals( expectedCount, actualCount ); + } + } +}