diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java
new file mode 100644
index 0000000000000..909d148edc8a8
--- /dev/null
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressor.java
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.impl.index.schema;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collection;
+
+import org.neo4j.cursor.RawCursor;
+import org.neo4j.index.internal.gbptree.Hit;
+
+public class NativeDistinctValuesProgressor, VALUE extends NativeSchemaValue> extends NativeIndexProgressor
+{
+ private final SchemaLayout layout;
+ private final KEY prev;
+ private boolean first = true;
+ private long countForCurrentValue;
+ private boolean last;
+
+ NativeDistinctValuesProgressor( RawCursor,IOException> seeker, NodeValueClient client,
+ Collection,IOException>> toRemoveFromOnClose, SchemaLayout layout )
+ {
+ super( seeker, client, toRemoveFromOnClose );
+ this.layout = layout;
+ prev = layout.newKey();
+ }
+
+ @Override
+ public boolean next()
+ {
+ try
+ {
+ while ( seeker.next() )
+ {
+ KEY key = seeker.get().key();
+ try
+ {
+ if ( first )
+ {
+ first = false;
+ countForCurrentValue = 1;
+ }
+ else if ( layout.compareValue( prev, key ) == 0 )
+ {
+ // same as previous
+ countForCurrentValue++;
+ }
+ else
+ {
+ // different from previous
+ boolean accepted = client.acceptNode( countForCurrentValue, extractValues( prev ) );
+ countForCurrentValue = 1;
+ if ( accepted )
+ {
+ return true;
+ }
+ }
+ }
+ finally
+ {
+ layout.copyKey( key, prev );
+ }
+ }
+ boolean finalResult = !first && !last && client.acceptNode( countForCurrentValue, extractValues( prev ) );
+ last = true;
+ return finalResult;
+ }
+ catch ( IOException e )
+ {
+ throw new UncheckedIOException( e );
+ }
+ }
+}
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java
index fad784aafdf64..43f5fa7f500a7 100644
--- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeHitIndexProgressor.java
@@ -25,22 +25,14 @@
import org.neo4j.cursor.RawCursor;
import org.neo4j.index.internal.gbptree.Hit;
-import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.values.storable.Value;
-public class NativeHitIndexProgressor, VALUE extends NativeSchemaValue> implements IndexProgressor
+public class NativeHitIndexProgressor, VALUE extends NativeSchemaValue> extends NativeIndexProgressor
{
- private final RawCursor,IOException> seeker;
- private final NodeValueClient client;
- private final Collection,IOException>> toRemoveFromOnClose;
- private boolean closed;
-
NativeHitIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client,
Collection,IOException>> toRemoveFromOnClose )
{
- this.seeker = seeker;
- this.client = client;
- this.toRemoveFromOnClose = toRemoveFromOnClose;
+ super( seeker, client, toRemoveFromOnClose );
}
@Override
@@ -69,27 +61,4 @@ protected boolean acceptValue( Value[] values )
{
return true;
}
-
- Value[] extractValues( KEY key )
- {
- return client.needsValues() ? new Value[]{ key.asValue()} : null;
- }
-
- @Override
- public void close()
- {
- if ( !closed )
- {
- closed = true;
- try
- {
- seeker.close();
- toRemoveFromOnClose.remove( seeker );
- }
- catch ( IOException e )
- {
- throw new UncheckedIOException( e );
- }
- }
- }
}
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java
new file mode 100644
index 0000000000000..7142626933bac
--- /dev/null
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeIndexProgressor.java
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.impl.index.schema;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.Collection;
+
+import org.neo4j.cursor.RawCursor;
+import org.neo4j.index.internal.gbptree.Hit;
+import org.neo4j.storageengine.api.schema.IndexProgressor;
+import org.neo4j.values.storable.Value;
+
+abstract class NativeIndexProgressor, VALUE extends NativeSchemaValue> implements IndexProgressor
+{
+ final RawCursor,IOException> seeker;
+ final NodeValueClient client;
+ private final Collection,IOException>> toRemoveFromOnClose;
+ private boolean closed;
+
+ NativeIndexProgressor( RawCursor,IOException> seeker, NodeValueClient client,
+ Collection,IOException>> toRemoveFromOnClose )
+ {
+ this.seeker = seeker;
+ this.client = client;
+ this.toRemoveFromOnClose = toRemoveFromOnClose;
+ }
+
+ @Override
+ public void close()
+ {
+ if ( !closed )
+ {
+ closed = true;
+ try
+ {
+ seeker.close();
+ toRemoveFromOnClose.remove( seeker );
+ }
+ catch ( IOException e )
+ {
+ throw new UncheckedIOException( e );
+ }
+ }
+ }
+
+ Value[] extractValues( KEY key )
+ {
+ return client.needsValues() ? new Value[]{ key.asValue()} : null;
+ }
+}
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java
index 554f5081287e4..ef8c2a56b65c9 100644
--- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/NativeSchemaIndexReader.java
@@ -151,6 +151,26 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder
@Override
public abstract boolean hasFullValuePrecision( IndexQuery... predicates );
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ KEY lowest = layout.newKey();
+ lowest.initialize( Long.MIN_VALUE );
+ lowest.initValueAsLowest();
+ KEY highest = layout.newKey();
+ highest.initialize( Long.MAX_VALUE );
+ highest.initValueAsHighest();
+ try
+ {
+ RawCursor,IOException> seeker = tree.seek( lowest, highest );
+ client.initialize( descriptor, new NativeDistinctValuesProgressor<>( seeker, client, openSeekers, (SchemaLayout) layout ), new IndexQuery[0] );
+ }
+ catch ( IOException e )
+ {
+ throw new UncheckedIOException( e );
+ }
+ }
+
abstract void validateQuery( IndexOrder indexOrder, IndexQuery[] predicates );
/**
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java
index 732634fd367c3..1c08ef2feabfb 100644
--- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/SpatialIndexReader.java
@@ -133,6 +133,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient cursor )
+ {
+ loadAll();
+ BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() );
+ cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
+ for ( NativeSchemaIndexReader,NativeSchemaValue> reader : this )
+ {
+ reader.distinctValues( multiProgressor );
+ }
+ }
+
private boolean validPredicate( IndexQuery predicate )
{
return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate;
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java
index 252e1c61843a3..c6b9e28c44b40 100644
--- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/TemporalIndexReader.java
@@ -118,6 +118,18 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return true;
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient cursor )
+ {
+ loadAll();
+ BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor, descriptor.schema().getPropertyIds() );
+ cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
+ for ( NativeSchemaIndexReader,NativeSchemaValue> reader : this )
+ {
+ reader.distinctValues( multiProgressor );
+ }
+ }
+
private boolean validPredicate( IndexQuery predicate )
{
return predicate instanceof IndexQuery.ExactPredicate || predicate instanceof IndexQuery.RangePredicate;
diff --git a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java
index 47e2df77b2f98..a3ad579849f70 100644
--- a/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java
+++ b/community/kernel/src/main/java/org/neo4j/kernel/impl/index/schema/fusion/FusionIndexReader.java
@@ -100,6 +100,15 @@ public void query( IndexProgressor.NodeValueClient cursor, IndexOrder indexOrder
}
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient cursor )
+ {
+ BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( cursor,
+ descriptor.schema().getPropertyIds() );
+ cursor.initialize( descriptor, multiProgressor, new IndexQuery[0] );
+ instanceSelector.forAll( reader -> reader.distinctValues( multiProgressor ) );
+ }
+
@Override
public boolean hasFullValuePrecision( IndexQuery... predicates )
{
diff --git a/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java b/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java
index 025f9cb8f37a1..52d734018ec80 100644
--- a/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java
+++ b/community/kernel/src/main/java/org/neo4j/storageengine/api/schema/IndexReader.java
@@ -69,6 +69,19 @@ void query(
*/
boolean hasFullValuePrecision( IndexQuery... predicates );
+ /**
+ * Initializes {@code client} to be able to progress through all distinct values in this index. {@link IndexProgressor.NodeValueClient}
+ * is used because it has a perfect method signature, even if the {@code reference} argument will instead be used
+ * as number of index entries for the specific indexed value.
+ *
+ * {@link IndexProgressor.NodeValueClient#needsValues()} decides whether or not values will be materialized and given to the client.
+ * The use-case for setting this to {@code false} is to have a more efficient counting of distinct values in an index,
+ * regardless of the actual values.
+ *
+ * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression.
+ */
+ void distinctValues( IndexProgressor.NodeValueClient client );
+
IndexReader EMPTY = new IndexReader()
{
// Used for checking index correctness
@@ -93,7 +106,7 @@ public PrimitiveLongResourceIterator query( IndexQuery[] predicates )
@Override
public void query( IndexProgressor.NodeValueClient client, IndexOrder indexOrder, IndexQuery... query )
{
- //do nothing
+ // do nothing
}
@Override
@@ -106,5 +119,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
{
return true;
}
+
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ // do nothing
+ }
};
}
diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java
index 6a33e32e05ec5..97e3311255f95 100644
--- a/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java
+++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/api/index/inmemory/HashBasedIndex.java
@@ -33,6 +33,7 @@
import org.neo4j.helpers.collection.Iterables;
import org.neo4j.internal.kernel.api.IndexQuery;
import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor;
+import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.storageengine.api.schema.IndexSampler;
import org.neo4j.values.storable.Value;
import org.neo4j.values.storable.ValueGroup;
@@ -269,6 +270,12 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ throw new UnsupportedOperationException();
+ }
+
private interface StringFilter
{
boolean test( String s );
diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java
new file mode 100644
index 0000000000000..9a6987471a486
--- /dev/null
+++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/GatheringNodeValueClient.java
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.impl.index.schema;
+
+import org.neo4j.internal.kernel.api.IndexQuery;
+import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor;
+import org.neo4j.storageengine.api.schema.IndexProgressor;
+import org.neo4j.values.storable.Value;
+
+/**
+ * Simple NodeValueClient test utility.
+ */
+public class GatheringNodeValueClient implements IndexProgressor.NodeValueClient
+{
+ public long reference;
+ public Value[] values;
+ public SchemaIndexDescriptor descriptor;
+ public IndexProgressor progressor;
+ public IndexQuery[] query;
+
+ @Override
+ public void initialize( SchemaIndexDescriptor descriptor, IndexProgressor progressor, IndexQuery[] query )
+ {
+ this.descriptor = descriptor;
+ this.progressor = progressor;
+ this.query = query;
+ }
+
+ @Override
+ public boolean acceptNode( long reference, Value... values )
+ {
+ this.reference = reference;
+ this.values = values;
+ return true;
+ }
+
+ @Override
+ public boolean needsValues()
+ {
+ return true;
+ }
+}
diff --git a/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java
new file mode 100644
index 0000000000000..cf55643d54541
--- /dev/null
+++ b/community/kernel/src/test/java/org/neo4j/kernel/impl/index/schema/NativeDistinctValuesProgressorTest.java
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.impl.index.schema;
+
+import org.apache.commons.lang3.mutable.MutableInt;
+import org.junit.Rule;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.Map;
+
+import org.neo4j.cursor.RawCursor;
+import org.neo4j.index.internal.gbptree.Hit;
+import org.neo4j.test.Randoms;
+import org.neo4j.test.rule.RandomRule;
+import org.neo4j.values.storable.Value;
+import org.neo4j.values.storable.Values;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.neo4j.kernel.impl.index.schema.NativeSchemaValue.INSTANCE;
+import static org.neo4j.values.storable.Values.stringValue;
+
+public class NativeDistinctValuesProgressorTest
+{
+ private final StringLayout layout = new StringLayout();
+
+ @Rule
+ public final RandomRule random = new RandomRule();
+
+ @Test
+ public void shouldCountDistinctValues()
+ {
+ // given
+ Value[] strings = generateRandomStrings();
+ DataCursor source = new DataCursor( asHitData( strings ) );
+ GatheringNodeValueClient client = new GatheringNodeValueClient();
+
+ // when
+ NativeDistinctValuesProgressor progressor =
+ new NativeDistinctValuesProgressor<>( source, client, new ArrayList<>(), layout );
+ Map expectedCounts = asDistinctCounts( strings );
+
+ // then
+ int uniqueValues = 0;
+ int nonUniqueValues = 0;
+ while ( progressor.next() )
+ {
+ Value string = client.values[0];
+ MutableInt expectedCount = expectedCounts.remove( string );
+ assertNotNull( expectedCount );
+ assertEquals( expectedCount.intValue(), client.reference );
+
+ if ( expectedCount.intValue() > 1 )
+ {
+ nonUniqueValues++;
+ }
+ else
+ {
+ uniqueValues++;
+ }
+ }
+ assertTrue( expectedCounts.isEmpty() );
+ assertTrue( uniqueValues > 0 );
+ assertTrue( nonUniqueValues > 0 );
+ }
+
+ private Map asDistinctCounts( Value[] strings )
+ {
+ Map map = new HashMap<>();
+ for ( Value string : strings )
+ {
+ map.computeIfAbsent( string, s -> new MutableInt( 0 ) ).increment();
+ }
+ return map;
+ }
+
+ private Value[] generateRandomStrings()
+ {
+ Value[] strings = new Value[1_000];
+ for ( int i = 0; i < strings.length; i++ )
+ {
+ // Potential for a lot of duplicates
+ strings[i] = stringValue( random.randoms().string( 1, 3, Randoms.CS_DIGITS ) );
+ }
+ Arrays.sort( strings, Values.COMPARATOR );
+ return strings;
+ }
+
+ private Collection> asHitData( Value[] strings )
+ {
+ Collection> data = new ArrayList<>( strings.length );
+ for ( int i = 0; i < strings.length; i++ )
+ {
+ StringSchemaKey key = layout.newKey();
+ key.from( i, strings[i] );
+ data.add( new SimpleHit<>( key, INSTANCE ) );
+ }
+ return data;
+ }
+
+ private static class DataCursor implements RawCursor,IOException>
+ {
+ private final Iterator> iterator;
+ private Hit current;
+
+ DataCursor( Collection> data )
+ {
+ this.iterator = data.iterator();
+ }
+
+ @Override
+ public boolean next() throws RuntimeException
+ {
+ if ( !iterator.hasNext() )
+ {
+ return false;
+ }
+ current = iterator.next();
+ return true;
+ }
+
+ @Override
+ public void close() throws RuntimeException
+ {
+ // Nothing to close
+ }
+
+ @Override
+ public Hit get()
+ {
+ return current;
+ }
+ }
+}
diff --git a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java
index 2501379d4a10e..c6e39a2cd9c4b 100644
--- a/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java
+++ b/community/kernel/src/test/java/org/neo4j/storageengine/api/schema/DefaultIndexReaderTest.java
@@ -27,7 +27,6 @@
import org.neo4j.collection.primitive.PrimitiveLongResourceIterator;
import org.neo4j.internal.kernel.api.IndexOrder;
import org.neo4j.internal.kernel.api.IndexQuery;
-import org.neo4j.kernel.api.exceptions.index.IndexNotApplicableKernelException;
import org.neo4j.values.storable.Value;
public class DefaultIndexReaderTest
@@ -77,6 +76,11 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ }
+
@Override
public void close()
{
diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java
index 909b3bd5c377c..d39ce68dccfe4 100644
--- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java
+++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/ValueEncoding.java
@@ -39,12 +39,12 @@
* Enumeration representing all possible property types with corresponding encodings and query structures for Lucene
* schema indexes.
*/
-enum ValueEncoding
+public enum ValueEncoding
{
Number
{
@Override
- String key()
+ public String key()
{
return "number";
}
@@ -78,7 +78,7 @@ Query encodeQuery( Value value, int propertyNumber )
Array
{
@Override
- String key()
+ public String key()
{
return "array";
}
@@ -111,7 +111,7 @@ Query encodeQuery( Value value, int propertyNumber )
Bool
{
@Override
- String key()
+ public String key()
{
return "bool";
}
@@ -144,7 +144,7 @@ Query encodeQuery( Value value, int propertyNumber )
Spatial
{
@Override
- String key()
+ public String key()
{
return "spatial";
}
@@ -180,7 +180,7 @@ Query encodeQuery( Value value, int propertyNumber )
Temporal
{
@Override
- String key()
+ public String key()
{
return "temporal";
}
@@ -213,7 +213,7 @@ Query encodeQuery( Value value, int propertyNumber )
String
{
@Override
- String key()
+ public String key()
{
return "string";
}
@@ -247,7 +247,7 @@ Query encodeQuery( Value value, int propertyNumber )
private static final ValueEncoding[] AllEncodings = values();
- abstract String key();
+ public abstract String key();
String key( int propertyNumber )
{
diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java
new file mode 100644
index 0000000000000..9472c942fb0b5
--- /dev/null
+++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/LuceneDistinctValuesProgressor.java
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.api.impl.schema.reader;
+
+import org.apache.lucene.index.TermsEnum;
+import org.apache.lucene.util.BytesRef;
+
+import java.io.IOException;
+import java.io.UncheckedIOException;
+import java.util.function.Function;
+
+import org.neo4j.storageengine.api.schema.IndexProgressor;
+import org.neo4j.values.storable.Value;
+
+class LuceneDistinctValuesProgressor implements IndexProgressor
+{
+ private final TermsEnum terms;
+ private final NodeValueClient client;
+ private final Function valueMaterializer;
+
+ LuceneDistinctValuesProgressor( TermsEnum terms, NodeValueClient client, Function valueMaterializer ) throws IOException
+ {
+ this.terms = terms;
+ this.client = client;
+ this.valueMaterializer = valueMaterializer;
+ }
+
+ @Override
+ public boolean next()
+ {
+ try
+ {
+ while ( (terms.next()) != null )
+ {
+ if ( client.acceptNode( terms.docFreq(), valueMaterializer.apply( terms.term() ) ) )
+ {
+ return true;
+ }
+ }
+ return false;
+ }
+ catch ( IOException e )
+ {
+ throw new UncheckedIOException( e );
+ }
+ }
+
+ @Override
+ public void close()
+ {
+ }
+}
diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java
index 3472e500f64f0..1bd9ffb91fdd6 100644
--- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java
+++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/PartitionedIndexReader.java
@@ -50,7 +50,6 @@
*/
public class PartitionedIndexReader extends AbstractIndexReader
{
-
private final List indexReaders;
public PartitionedIndexReader( List partitionSearchers,
@@ -114,6 +113,14 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ BridgingIndexProgressor bridgingIndexProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() );
+ indexReaders.parallelStream().forEach( reader -> reader.distinctValues( bridgingIndexProgressor ) );
+ client.initialize( descriptor, bridgingIndexProgressor, new IndexQuery[0] );
+ }
+
private PrimitiveLongResourceIterator innerQuery( IndexReader reader, IndexQuery[] predicates )
{
try
diff --git a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java
index 3ba0feeccbc54..de987fdc35e6c 100644
--- a/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java
+++ b/community/lucene-index/src/main/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReader.java
@@ -19,15 +19,23 @@
*/
package org.neo4j.kernel.api.impl.schema.reader;
+import org.apache.lucene.index.Fields;
+import org.apache.lucene.index.MultiFields;
+import org.apache.lucene.index.Terms;
+import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TotalHitCountCollector;
+import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
import java.io.IOException;
+import java.io.UncheckedIOException;
import java.util.Arrays;
+import java.util.function.Function;
import org.neo4j.collection.primitive.PrimitiveLongResourceIterator;
import org.neo4j.helpers.TaskControl;
@@ -39,14 +47,17 @@
import org.neo4j.kernel.api.impl.index.collector.DocValuesCollector;
import org.neo4j.kernel.api.impl.index.partition.PartitionSearcher;
import org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure;
+import org.neo4j.kernel.api.impl.schema.ValueEncoding;
import org.neo4j.kernel.api.impl.schema.sampler.NonUniqueLuceneIndexSampler;
import org.neo4j.kernel.api.impl.schema.sampler.UniqueLuceneIndexSampler;
import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptor;
import org.neo4j.kernel.impl.api.index.sampling.IndexSamplingConfig;
+import org.neo4j.kernel.impl.index.schema.fusion.BridgingIndexProgressor;
import org.neo4j.storageengine.api.schema.AbstractIndexReader;
import org.neo4j.storageengine.api.schema.IndexProgressor;
import org.neo4j.storageengine.api.schema.IndexSampler;
import org.neo4j.values.storable.Value;
+import org.neo4j.values.storable.Values;
import static java.lang.String.format;
import static org.neo4j.internal.kernel.api.IndexQuery.IndexQueryType.exact;
@@ -186,6 +197,45 @@ public boolean hasFullValuePrecision( IndexQuery... predicates )
return false;
}
+ /**
+ * OBS this implementation can only provide values for properties of type {@link String}.
+ * Other property types will still be counted as distinct, but {@code client} won't receive {@link Value}
+ * instances for those.
+ *
+ * @param client {@link IndexProgressor.NodeValueClient} to get initialized with this progression.
+ */
+ @Override
+ public void distinctValues( IndexProgressor.NodeValueClient client )
+ {
+ try
+ {
+ IndexQuery[] noQueries = new IndexQuery[0];
+ BridgingIndexProgressor multiProgressor = new BridgingIndexProgressor( client, descriptor.schema().getPropertyIds() );
+ Fields fields = MultiFields.getFields( getIndexSearcher().getIndexReader() );
+ for ( ValueEncoding valueEncoding : ValueEncoding.values() )
+ {
+ Terms terms = fields.terms( valueEncoding.key() );
+ if ( terms != null )
+ {
+ Function valueMaterializer = valueEncoding == ValueEncoding.String && client.needsValues()
+ ? term -> Values.stringValue( term.utf8ToString() )
+ : term -> null;
+ TermsEnum termsIterator = terms.iterator();
+ if ( valueEncoding == ValueEncoding.Number )
+ {
+ termsIterator = NumericUtils.filterPrefixCodedLongs( termsIterator );
+ }
+ multiProgressor.initialize( descriptor, new LuceneDistinctValuesProgressor( termsIterator, client, valueMaterializer ), noQueries );
+ }
+ }
+ client.initialize( descriptor, multiProgressor, noQueries );
+ }
+ catch ( IOException e )
+ {
+ throw new UncheckedIOException( e );
+ }
+ }
+
private void assertNotComposite( IndexQuery[] predicates )
{
assert predicates.length == 1 : "composite indexes not yet supported for this operation";
diff --git a/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java
new file mode 100644
index 0000000000000..036ebcc6ccd5e
--- /dev/null
+++ b/community/lucene-index/src/test/java/org/neo4j/kernel/api/impl/schema/reader/SimpleIndexReaderDistinctValuesTest.java
@@ -0,0 +1,135 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j.
+ *
+ * Neo4j is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, either version 3 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see .
+ */
+package org.neo4j.kernel.api.impl.schema.reader;
+
+import org.apache.commons.lang3.mutable.MutableInt;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Rule;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.neo4j.kernel.api.impl.schema.LuceneSchemaIndexBuilder;
+import org.neo4j.kernel.api.impl.schema.SchemaIndex;
+import org.neo4j.kernel.api.impl.schema.writer.LuceneIndexWriter;
+import org.neo4j.kernel.api.schema.index.SchemaIndexDescriptorFactory;
+import org.neo4j.kernel.configuration.Config;
+import org.neo4j.kernel.impl.index.schema.GatheringNodeValueClient;
+import org.neo4j.storageengine.api.schema.IndexReader;
+import org.neo4j.test.rule.RandomRule;
+import org.neo4j.test.rule.TestDirectory;
+import org.neo4j.test.rule.fs.DefaultFileSystemRule;
+import org.neo4j.values.storable.Value;
+import org.neo4j.values.storable.Values;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.neo4j.kernel.api.impl.schema.LuceneDocumentStructure.documentRepresentingProperties;
+import static org.neo4j.test.Randoms.CS_DIGITS;
+import static org.neo4j.values.storable.Values.stringValue;
+
+public class SimpleIndexReaderDistinctValuesTest
+{
+ @Rule
+ public final RandomRule random = new RandomRule();
+ @Rule
+ public final DefaultFileSystemRule fs = new DefaultFileSystemRule();
+ @Rule
+ public final TestDirectory directory = TestDirectory.testDirectory( fs );
+ private SchemaIndex index;
+
+ @Before
+ public void setup() throws IOException
+ {
+ index = LuceneSchemaIndexBuilder.create( SchemaIndexDescriptorFactory.forLabel( 1, 1 ), Config.defaults() )
+ .withFileSystem( fs )
+ .withIndexRootFolder( directory.directory() )
+ .build();
+ index.create();
+ index.open();
+ }
+
+ @After
+ public void tearDown() throws IOException
+ {
+ index.close();
+ }
+
+ @Test
+ public void shouldGetDistinctStringValues() throws IOException
+ {
+ // given
+ LuceneIndexWriter writer = index.getIndexWriter();
+ Map expectedCounts = new HashMap<>();
+ for ( int i = 0; i < 10_000; i++ )
+ {
+ Value value = stringValue( random.randoms().string( 1, 3, CS_DIGITS ) );
+ writer.addDocument( documentRepresentingProperties( i, value ) );
+ expectedCounts.computeIfAbsent( value, v -> new MutableInt( 0 ) ).increment();
+ }
+ index.maybeRefreshBlocking();
+
+ // when/then
+ GatheringNodeValueClient client = new GatheringNodeValueClient();
+ try ( IndexReader reader = index.getIndexReader() )
+ {
+ reader.distinctValues( client );
+ while ( client.progressor.next() )
+ {
+ Value value = client.values[0];
+ MutableInt expectedCount = expectedCounts.remove( value );
+ assertNotNull( expectedCount );
+ assertEquals( expectedCount.intValue(), client.reference );
+ }
+ assertTrue( expectedCounts.isEmpty() );
+ }
+ }
+
+ @Test
+ public void shouldCountDistinctValues() throws IOException
+ {
+ // given
+ LuceneIndexWriter writer = index.getIndexWriter();
+ int expectedCount = 10_000;
+ for ( int i = 0; i < expectedCount; i++ )
+ {
+ Value value = Values.of( random.propertyValue() );
+ writer.addDocument( documentRepresentingProperties( i, value ) );
+ }
+ index.maybeRefreshBlocking();
+
+ // when/then
+ GatheringNodeValueClient client = new GatheringNodeValueClient();
+ try ( IndexReader reader = index.getIndexReader() )
+ {
+ reader.distinctValues( client );
+ int actualCount = 0;
+ while ( client.progressor.next() )
+ {
+ actualCount += client.reference;
+ }
+ assertEquals( expectedCount, actualCount );
+ }
+ }
+}