Improve performance of Node-Index-Join planning for many-pattern queries

neo4j · Sep 18, 2018 · 87da4e7 · 87da4e7
1 parent 093a6db
commit 87da4e7
Show file tree

Hide file tree

Showing 3 changed files with 118 additions and 30 deletions.
diff --git a/...o4j/cypher/internal/compiler/v3_5/planner/logical/idp/cartesianProductsOrValueJoins.scala b/...o4j/cypher/internal/compiler/v3_5/planner/logical/idp/cartesianProductsOrValueJoins.scala
@@ -66,7 +66,8 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
     So, when we have too many plans to combine, we fall back to the naive way of just building a left deep tree with
     all query parts cross joined together.
      */
-    val joins = produceHashJoins(plans, qg, context, kit, singleComponentPlanner) ++
+    val joins =
+      produceHashJoins(plans, qg, context, kit, singleComponentPlanner) ++
       produceNIJVariations(plans, qg, requiredOrder, context, kit, singleComponentPlanner)
 
     if (joins.nonEmpty) {
@@ -111,23 +112,45 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
     }).toMap
   }
 
+  // Developers note: This method has been re-implemented in a very low-level imperative style, because
+  // this code path caused a big SOAK regression for queries with 50-60 plans. The current implementation is
+  // about 100x faster than the old one, please change functionality here with one eye on performance.
   private def produceNIJVariations(plans: Set[PlannedComponent],
                                    qg: QueryGraph,
                                    requiredOrder: RequiredOrder,
                                    context: LogicalPlanningContext,
                                    kit: QueryPlannerKit,
-                                   singleComponentPlanner: SingleComponentPlannerTrait): Map[PlannedComponent, (PlannedComponent, PlannedComponent)] = {
-    (for {
-      t1@PlannedComponent(qgA, planA) <- plans
-      t2@PlannedComponent(qgB, planB) <- plans if planA != planB
-      predicate <- this.predicatesDependendingOnBothSides(qg.selections.flatPredicates, qgA, qgB)
-    } yield {
-      val nestedIndexJoinAB = planNIJ(planA, planB, qgA, qgB, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
-      val nestedIndexJoinBA = planNIJ(planB, planA, qgB, qgA, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
-
-      nestedIndexJoinAB.map(x => (x, t1 -> t2)) ++ nestedIndexJoinBA.map(x => (x, t1 -> t2))
+                                   singleComponentPlanner: SingleComponentPlannerTrait):
+  Map[PlannedComponent, (PlannedComponent, PlannedComponent)] = {
+    val predicatesWithDependencies = qg.selections.flatPredicates.toArray.map(pred => (pred, pred.dependencies.map(_.name).toArray))
+    val planArray = plans.toArray
+    val allCoveredIds = planArray.map(_.queryGraph.allCoveredIds)
+
+    val result = Map.newBuilder[PlannedComponent, (PlannedComponent, PlannedComponent)]
+
+    var a = 0
+    while (a < planArray.length) {
+      var b = a + 1
+      while (b < planArray.length) {
+
+        val planA = planArray(a).plan
+        val planB = planArray(b).plan
+        val qgA = planArray(a).queryGraph
+        val qgB = planArray(b).queryGraph
+
+        for (predicate <- this.predicatesDependendingOnBothSides(predicatesWithDependencies, allCoveredIds(a), allCoveredIds(b))) {
+          val nestedIndexJoinAB = planNIJ(planA, planB, qgA, qgB, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
+          val nestedIndexJoinBA = planNIJ(planB, planA, qgB, qgA, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
+
+          nestedIndexJoinAB.foreach(x => result += ((x, planArray(a) -> planArray(b))))
+          nestedIndexJoinBA.foreach(x => result += ((x, planArray(a) -> planArray(b))))
+        }
+        b += 1
+      }
+      a += 1
+    }
 
-    }).flatten.toMap
+    result.result()
   }
 
   private def produceHashJoins(plans: Set[PlannedComponent],
@@ -199,16 +222,26 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
         r.dependencies != l.dependencies => e
   }.toSet
 
-  def predicatesDependendingOnBothSides(flatPredicates: Seq[Expression], lhs: QueryGraph, rhs: QueryGraph): Seq[Expression] =
-    flatPredicates.filter { pred =>
-      val deps = pred.dependencies.map(_.name)
-      val idsFromLeft = lhs.allCoveredIds
-      val idsFromRight = rhs.allCoveredIds
-      val unfullfilledDepsLhs = deps -- idsFromLeft
-      val unfullfilledDepsRhs = deps -- idsFromRight
-
-      unfullfilledDepsLhs.nonEmpty && // The left plan is not enough
-        unfullfilledDepsRhs.nonEmpty && // Neither is the right one
-        (unfullfilledDepsLhs -- idsFromRight).isEmpty // But together we're good
-    }
+  // Imperative implementation style for performance. See produceNIJVariations.
+  def predicatesDependendingOnBothSides(predicateDependencies: Array[(Expression, Array[String])], idsFromLeft: Set[String], idsFromRight: Set[String]): Seq[Expression] =
+    predicateDependencies.filter {
+      case (_, deps) =>
+        var i = 0
+        var unfulfilledLhsDep = false
+        var unfulfilledRhsDep = false
+        var forAllLhsOrRhs = true
+
+        while (i < deps.length) {
+          val inLhs = idsFromLeft(deps(i))
+          val inRhs = idsFromRight(deps(i))
+          unfulfilledLhsDep = unfulfilledLhsDep || !inLhs
+          unfulfilledRhsDep = unfulfilledRhsDep || !inRhs
+          forAllLhsOrRhs = forAllLhsOrRhs && (inLhs || inRhs)
+          i += 1
+        }
+
+        unfulfilledLhsDep && // The left plan is not enough
+          unfulfilledRhsDep && // Neither is the right one
+          forAllLhsOrRhs // But together we're good
+    }.map(_._1)
 }
diff --git a/...cypher/internal/compiler/v3_5/planner/logical/idp/CartesianProductsOrValueJoinsTest.scala b/...cypher/internal/compiler/v3_5/planner/logical/idp/CartesianProductsOrValueJoinsTest.scala
@@ -201,17 +201,17 @@ class CartesianProductsOrValueJoinsTest
     val nProp = prop("n", "prop")
     val xProp = prop("x", "prop")
 
-    val predicate1 = Contains(nProp, xProp)(pos)
-    val predicate2 = propEquality("n", "prop", 42)
+    val predicate1 = Contains(nProp, xProp)(pos) -> Array("n", "x")
+    val predicate2 = propEquality("n", "prop", 42) -> Array("n")
 
-    val qg1 = QueryGraph.empty.withPatternNodes(Set("n"))
-    val qg2 = QueryGraph.empty.withPatternNodes(Set("x"))
+    val idsFromLeft = Set("n")
+    val idsFromRight = Set("x")
 
     // when
-    val result = cartesianProductsOrValueJoins.predicatesDependendingOnBothSides(Seq(predicate1, predicate2), qg1, qg2)
+    val result = cartesianProductsOrValueJoins.predicatesDependendingOnBothSides(Array(predicate1, predicate2), idsFromLeft, idsFromRight)
 
     // then
-    result should be(List(predicate1))
+    result should be(List(predicate1._1))
   }
 
   private def testThis(graph: QueryGraph, input: (PlanningAttributes) => Set[PlannedComponent], assertion: LogicalPlan => Unit): Unit = {

diff --git a/...src/test/scala/org/neo4j/internal/cypher/acceptance/PlannerRobustnessAcceptanceTest.scala b/...src/test/scala/org/neo4j/internal/cypher/acceptance/PlannerRobustnessAcceptanceTest.scala
@@ -0,0 +1,55 @@
+/*
+ * Copyright (c) 2002-2018 "Neo4j,"
+ * Neo4j Sweden AB [http://neo4j.com]
+ *
+ * This file is part of Neo4j Enterprise Edition. The included source
+ * code can be redistributed and/or modified under the terms of the
+ * GNU AFFERO GENERAL PUBLIC LICENSE Version 3
+ * (http://www.fsf.org/licensing/licenses/agpl-3.0.html) with the
+ * Commons Clause, as found in the associated LICENSE.txt file.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Affero General Public License for more details.
+ *
+ * Neo4j object code can be licensed independently from the source
+ * under separate terms from the AGPL. Inquiries can be directed to:
+ * licensing@neo4j.com
+ *
+ * More information is also available at:
+ * https://neo4j.com/licensing/
+ */
+package org.neo4j.internal.cypher.acceptance
+
+import org.neo4j.cypher.ExecutionEngineFunSuite
+
+class PlannerRobustnessAcceptanceTest extends ExecutionEngineFunSuite with CypherComparisonSupport {
+
+  private val t0 = System.nanoTime() / 1000
+
+  test("should plan query of 100 patterns in reasonable time") {
+    val query =
+      "MATCH " + (1 to 100).map(i => s"(user$i:User {userId:$i})").mkString(", ") +
+      "RETURN count(*)"
+
+    graph.execute("""FOREACH (n IN range(1, 100) | CREATE (:User {userId: n}))""")
+    graph.createIndex("User", "userId")
+
+    val t1 = System.nanoTime() / 1000
+    val result = graph.execute(query)
+    while (result.hasNext) result.next()
+    val t2 = System.nanoTime() / 1000
+
+    val setupTime = t1 - t0
+    val queryTime = t2 - t1
+
+    if (queryTime > setupTime) {
+      fail(
+        """Query time for 100-pattern query is too long (bigger that time to start entire db and build index).
+          |  Setup time: %10d us
+          |  Query time: %10d us
+        """.stripMargin.format(setupTime, queryTime))
+    }
+  }
+}