From 54de2702286f19f13a59b976199986a960000597 Mon Sep 17 00:00:00 2001 From: Andres Taylor Date: Fri, 18 May 2018 06:45:09 +0200 Subject: [PATCH] Add a primitive hash-map implementation Will be used for NodeHashJoins --- .../internal/runtime/LongArrayHash.java | 60 ++++ .../runtime/LongArrayHashMultiMap.java | 265 ++++++++++++++++++ ...ongArraySet.java => LongArrayHashSet.java} | 47 +--- .../runtime/LongArrayHashMultiMapTest.scala | 65 +++++ ...tTest.scala => LongArrayHashSetTest.scala} | 8 +- .../pipes/DistinctSlottedPrimitivePipe.scala | 7 +- 6 files changed, 406 insertions(+), 46 deletions(-) create mode 100644 community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHash.java create mode 100644 community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMap.java rename community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/{LongArraySet.java => LongArrayHashSet.java} (83%) create mode 100644 community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMapTest.scala rename community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/{LongArraySetTest.scala => LongArrayHashSetTest.scala} (94%) diff --git a/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHash.java b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHash.java new file mode 100644 index 0000000000000..523cc33cd8d55 --- /dev/null +++ b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHash.java @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.cypher.internal.runtime; + +public class LongArrayHash +{ + + public static final long NOT_IN_USE = -2; + public static final int SLOT_EMPTY = 0; + public static final int VALUE_FOUND = 1; + public static final int CONTINUE_PROBING = -1; + + public static int hashCode( long[] arr, int from, int numberOfElements ) + { + // This way of producing a hashcode for an array of longs is the + // same used by java.util.Arrays.hashCode(long[]) + int h = 1; + for ( int i = from; i < from + numberOfElements; i++ ) + { + long element = arr[i]; + int elementHash = (int) (element ^ (element >>> 32)); + h = 31 * h + elementHash; + } + + return h; + } + + static boolean validValue( long[] arr, int width ) + { + if ( arr.length != width ) + { + throw new AssertionError( "all elements in the set must have the same size" ); + } + for ( long l : arr ) + { + if ( l == -1 || l == -2 ) + { + throw new AssertionError( "magic values -1 and -2 not allowed in keys" ); + } + } + return true; + } +} diff --git a/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMap.java b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMap.java new file mode 100644 index 0000000000000..a62131607bc14 --- /dev/null +++ b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMap.java @@ -0,0 +1,265 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.cypher.internal.runtime; + +import java.util.Iterator; + +/** + * A fast implementation of a multi map with long[] as keys. + * + * Multi maps are maps that can store multiple values per key. + * @param + */ +public class LongArrayHashMultiMap +{ + private static final long NOT_IN_USE = -2; + private static final int SLOT_EMPTY = 0; + private static final int VALUE_FOUND = 1; + private static final int CONTINUE_PROBING = -1; + private static final double LOAD_FACTOR = 0.75; + + private final int width; + private Table table; + + public LongArrayHashMultiMap( int initialCapacity, int width ) + { + assert (initialCapacity & (initialCapacity - 1)) == 0 : "Size must be a power of 2"; + assert width > 0 : "Number of elements must be larger than 0"; + + this.width = width; + table = new Table( initialCapacity ); + } + + public void add( long[] key, VALUE value ) + { + assert LongArrayHash.validValue( key, width ); + int slotNr = slotFor( key ); + + while ( true ) + { + int offset = slotNr * width; + if ( table.keys[offset] == NOT_IN_USE ) + { + if ( table.timeToResize() ) + { + // We know we need to add the value to the set, but there is no space left + resize(); + // Need to restart linear probe after resizing + slotNr = slotFor( key ); + } + else + { + // We found an empty spot! + table.setFirstValue( slotNr, key, value ); + return; + } + } + else + { + for ( int i = 0; i < width; i++ ) + { + if ( table.keys[offset + i] != key[i] ) + { + // Found a different value in this slot - continue probing + slotNr = (slotNr + 1) & table.tableMask; + break; + } + else if ( i == width - 1 ) + { + // We found other matching values + table.addValue( slotNr, value ); + return; + } + } + } + } + } + + public Iterator get( long[] key ) + { + assert LongArrayHash.validValue( key, width ); + int slot = slotFor( key ); + + int result = table.checkSlot( slot, key ); + while ( result == CONTINUE_PROBING ) + { + result = table.checkSlot( slot, key ); + slot = (slot + 1) & table.tableMask; + } + @SuppressWarnings( "unchecked" ) Node current = (Node) table.values[slot]; + + return new Result( current ); + } + + public boolean isEmpty() + { + for ( int i = 0; i < table.capacity; i++ ) + { + if ( table.keys[i] != NOT_IN_USE ) + { + return false; + } + } + return true; + } + + private void resize() + { + int oldSize = table.capacity; + int oldNumberEntries = table.numberOfEntries; + long[] srcKeys = table.keys; + Object[] srcValues = table.values; + table = new Table( oldSize * 2 ); + long[] dstKeys = table.keys; + table.numberOfEntries = oldNumberEntries; + + for ( int fromSlot = 0; fromSlot < oldSize; fromSlot = fromSlot + 1 ) + { + int fromOffset = fromSlot * width; + if ( srcKeys[fromOffset] != NOT_IN_USE ) + { + int toSlot = LongArrayHash.hashCode( srcKeys, fromOffset, width ) & table.tableMask; + + if ( dstKeys[toSlot * width] != NOT_IN_USE ) + { + // Linear probe until we find an unused slot. + // No need to check for size here - we are already inside of resize() + toSlot = findUnusedSlot( dstKeys, toSlot ); + } + System.arraycopy( srcKeys, fromOffset, dstKeys, toSlot * width, width ); + table.values[toSlot] = srcValues[fromSlot]; + } + } + } + + private int findUnusedSlot( long[] to, int fromSlot ) + { + while ( true ) + { + if ( to[fromSlot * width] == NOT_IN_USE ) + { + return fromSlot; + } + fromSlot = (fromSlot + 1) & table.tableMask; + } + } + + + private int slotFor( long[] value ) + { + return LongArrayHash.hashCode( value, 0, width ) & table.tableMask; + } + + class Node + { + final VALUE value; + final Node next; + + public Node( VALUE value, Node next ) + { + this.value = value; + this.next = next; + } + } + + class Result extends org.neo4j.helpers.collection.PrefetchingIterator + { + private Node current; + + public Result( Node first ) + { + current = first; + } + + @Override + protected VALUE fetchNextOrNull() + { + if ( current == null ) + { + return null; + } + VALUE value = current.value; + current = current.next; + return value; + } + } + + class Table + { + private final int capacity; + private final long[] keys; + private final Object[] values; + int numberOfEntries; + private int resizeLimit; + + int tableMask; + + Table( int capacity ) + { + this.capacity = capacity; + resizeLimit = (int) (capacity * LOAD_FACTOR); + tableMask = Integer.highestOneBit( capacity ) - 1; + keys = new long[capacity * width]; + java.util.Arrays.fill( keys, NOT_IN_USE ); + values = new Object[capacity]; + } + + boolean timeToResize() + { + return numberOfEntries == resizeLimit; + } + + // This code is duplicated in LongArrayHashSet. We should measure if it's OK to extract into LongArrayHash + int checkSlot( int slot, long[] value ) + { + assert value.length == width; + + int startOffset = slot * width; + if ( keys[startOffset] == NOT_IN_USE ) + { + return SLOT_EMPTY; + } + + for ( int i = 0; i < width; i++ ) + { + if ( keys[startOffset + i] != value[i] ) + { + return CONTINUE_PROBING; + } + } + + return VALUE_FOUND; + } + + void setFirstValue( int slot, long[] key, VALUE value ) + { + int offset = slot * width; + System.arraycopy( key, 0, keys, offset, width ); + values[slot] = new Node( value, null ); + numberOfEntries++; + } + + void addValue( int slot, VALUE value ) + { + @SuppressWarnings( "unchecked" ) Node current = (Node) values[slot]; + values[slot] = new Node( value, current ); + } + } +} diff --git a/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArraySet.java b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashSet.java similarity index 83% rename from community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArraySet.java rename to community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashSet.java index d562bd1436be1..3df123e2e5073 100644 --- a/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArraySet.java +++ b/community/cypher/runtime-util/src/main/java/org/neo4j/cypher/internal/runtime/LongArrayHashSet.java @@ -31,10 +31,9 @@ * The word "offset" here means the index into an array, * and slot is a number that multiplied by the width of the values will return the offset. */ -public class LongArraySet +public class LongArrayHashSet { private static final long NOT_IN_USE = -2; - private static final int SLOT_EMPTY = 0; private static final int VALUE_FOUND = 1; private static final int CONTINUE_PROBING = -1; @@ -43,7 +42,7 @@ public class LongArraySet private Table table; private final int width; - public LongArraySet( int initialCapacity, int width ) + public LongArrayHashSet( int initialCapacity, int width ) { assert (initialCapacity & (initialCapacity - 1)) == 0 : "Size must be a power of 2"; assert width > 0 : "Number of elements must be larger than 0"; @@ -60,7 +59,7 @@ public LongArraySet( int initialCapacity, int width ) */ public boolean add( long[] value ) { - assert validValue( value ); + assert LongArrayHash.validValue( value, width ); int slotNr = slotFor( value ); while ( true ) { @@ -106,7 +105,7 @@ else if ( i == width - 1 ) */ public boolean contains( long[] value ) { - assert validValue( value ); + assert LongArrayHash.validValue( value, width ); int slot = slotFor( value ); int result; @@ -119,40 +118,6 @@ public boolean contains( long[] value ) return result == VALUE_FOUND; } - /* - Only called from assert - */ - private boolean validValue( long[] arr ) - { - if ( arr.length != width ) - { - throw new AssertionError( "all elements in the set must have the same size" ); - } - for ( long l : arr ) - { - if ( l == -1 || l == -2 ) - { - throw new AssertionError( "magic values -1 and -2 not allowed in set" ); - } - } - return true; - } - - private int hashCode( long[] arr, int from, int numberOfElements ) - { - // This way of producing a hashcode for an array of longs is the - // same used by java.util.Arrays.hashCode(long[]) - int h = 1; - for ( int i = from; i < from + numberOfElements; i++ ) - { - long element = arr[i]; - int elementHash = (int) (element ^ (element >>> 32)); - h = 31 * h + elementHash; - } - - return h; - } - private void resize() { int oldSize = table.capacity; @@ -166,7 +131,7 @@ private void resize() { if ( srcArray[fromOffset] != NOT_IN_USE ) { - int toSlot = hashCode( srcArray, fromOffset, width ) & table.tableMask; + int toSlot = LongArrayHash.hashCode( srcArray, fromOffset, width ) & table.tableMask; if ( dstArray[toSlot * width] != NOT_IN_USE ) { @@ -193,7 +158,7 @@ private int findUnusedSlot( long[] to, int fromSlot ) private int slotFor( long[] value ) { - return hashCode( value, 0, width ) & table.tableMask; + return LongArrayHash.hashCode( value, 0, width ) & table.tableMask; } class Table diff --git a/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMapTest.scala b/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMapTest.scala new file mode 100644 index 0000000000000..f7c8b42b3fb7a --- /dev/null +++ b/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashMultiMapTest.scala @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2002-2018 "Neo4j," + * Neo4j Sweden AB [http://neo4j.com] + * + * This file is part of Neo4j. + * + * Neo4j is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +package org.neo4j.cypher.internal.runtime + +import org.scalatest.{FunSuite, Matchers} + +import scala.collection.JavaConverters._ + +class LongArrayHashMultiMapTest extends FunSuite with Matchers { + test("basic") { + val map = new LongArrayHashMultiMap[String](32, 3) + map.add(Array(1L, 2L, 3L), "hello") + map.add(Array(1L, 2L, 3L), "world") + val iterator = map.get(Array(1L, 2L, 3L)) + + iterator.asScala.toList should equal(List("world", "hello")) + + map.get(Array(6L, 6L, 6L)).hasNext should equal(false) + map.isEmpty should equal(false) + } + + test("isEmpty") { + val map = new LongArrayHashMultiMap(32, 11) + map.isEmpty should equal(true) + } + + test("fill and resize") { + val map = new LongArrayHashMultiMap[String](8, 3) + map.add(Array(0L, 8L, 1L), "hello") + map.add(Array(0L, 7L, 2L), "is") + map.add(Array(0L, 6L, 3L), "it") + map.add(Array(0L, 5L, 4L), "me") + map.add(Array(0L, 4L, 5L), "you") + map.add(Array(0L, 3L, 6L), "are") + map.add(Array(0L, 2L, 7L), "looking") + map.add(Array(0L, 1L, 8L), "for") + + map.get(Array(0L, 7L, 2L)).asScala.toList should equal(List("is")) + map.get(Array(0L, 6L, 3L)).asScala.toList should equal(List("it")) + map.get(Array(0L, 5L, 4L)).asScala.toList should equal(List("me")) + map.get(Array(0L, 4L, 5L)).asScala.toList should equal(List("you")) + map.get(Array(0L, 3L, 6L)).asScala.toList should equal(List("are")) + map.get(Array(0L, 2L, 7L)).asScala.toList should equal(List("looking")) + map.get(Array(0L, 1L, 8L)).asScala.toList should equal(List("for")) + map.isEmpty should equal(false) + } + +} diff --git a/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArraySetTest.scala b/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashSetTest.scala similarity index 94% rename from community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArraySetTest.scala rename to community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashSetTest.scala index 732321e3466d4..4b39b94d97c0a 100644 --- a/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArraySetTest.scala +++ b/community/cypher/runtime-util/src/test/scala/org/neo4j/cypher/internal/runtime/LongArrayHashSetTest.scala @@ -26,7 +26,7 @@ import org.scalatest.{FunSuite, Matchers} import scala.collection.mutable import scala.util.Random -class LongArraySetTest extends FunSuite with Matchers { +class LongArrayHashSetTest extends FunSuite with Matchers { val r = new Random() @@ -34,7 +34,7 @@ class LongArraySetTest extends FunSuite with Matchers { test(s"test #$i") { val width = r.nextInt(10) + 2 val size = r.nextInt(10000) - val tested = new LongArraySet(16, width) + val tested = new LongArrayHashSet(16, width) val validator = new mutable.HashSet[Array[Long]]() (0 to size) foreach { _ => val tuple = new Array[Long](width) @@ -55,7 +55,7 @@ class LongArraySetTest extends FunSuite with Matchers { val b = validator.contains(tuple) if(a != b) - fail(s"Value: ${util.Arrays.toString(tuple)} LongArraySet $a mutable.HashSet") + fail(s"Value: ${util.Arrays.toString(tuple)} LongArrayHashSet $a mutable.HashSet") } } } @@ -69,7 +69,7 @@ class LongArraySetTest extends FunSuite with Matchers { } test("manual test to help with debugging") { - val set = new LongArraySet(8, 3) + val set = new LongArrayHashSet(8, 3) set.add(Array(1, 2, 3)) set.add(Array(2, 3, 4)) set.add(Array(3, 6, 7)) diff --git a/enterprise/cypher/slotted-runtime/src/main/scala/org/neo4j/cypher/internal/runtime/slotted/pipes/DistinctSlottedPrimitivePipe.scala b/enterprise/cypher/slotted-runtime/src/main/scala/org/neo4j/cypher/internal/runtime/slotted/pipes/DistinctSlottedPrimitivePipe.scala index 97a74ee5b8cf5..b83902039697f 100644 --- a/enterprise/cypher/slotted-runtime/src/main/scala/org/neo4j/cypher/internal/runtime/slotted/pipes/DistinctSlottedPrimitivePipe.scala +++ b/enterprise/cypher/slotted-runtime/src/main/scala/org/neo4j/cypher/internal/runtime/slotted/pipes/DistinctSlottedPrimitivePipe.scala @@ -25,8 +25,13 @@ import org.neo4j.cypher.internal.runtime.interpreted.commands.expressions.Expres import org.neo4j.cypher.internal.runtime.interpreted.pipes.{Pipe, PipeWithSource, QueryState} import org.neo4j.cypher.internal.runtime.slotted.SlottedExecutionContext import org.neo4j.cypher.internal.runtime.slotted.helpers.SlottedPipeBuilderUtils +<<<<<<< HEAD import org.neo4j.cypher.internal.runtime.{LongArraySet, PrefetchingIterator} import org.opencypher.v9_0.util.attribution.Id +======= +import org.neo4j.cypher.internal.runtime.{LongArrayHashSet, PrefetchingIterator} +import org.neo4j.cypher.internal.util.v3_5.attribution.Id +>>>>>>> Add a primitive hash-map implementation import scala.collection.immutable @@ -55,7 +60,7 @@ case class DistinctSlottedPrimitivePipe(source: Pipe, protected def internalCreateResults(input: Iterator[ExecutionContext], state: QueryState): Iterator[ExecutionContext] = { new PrefetchingIterator[ExecutionContext] { - private val seen = new LongArraySet(32, projections.size) + private val seen = new LongArrayHashSet(32, projections.size) override def produceNext(): Option[ExecutionContext] = { while (input.nonEmpty) {