Skip to content

Commit

Permalink
Improve performance of Node-Index-Join planning for many-pattern queries
Browse files Browse the repository at this point in the history
  • Loading branch information
fickludd committed Sep 18, 2018
1 parent 093a6db commit 87da4e7
Show file tree
Hide file tree
Showing 3 changed files with 118 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,8 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
So, when we have too many plans to combine, we fall back to the naive way of just building a left deep tree with
all query parts cross joined together.
*/
val joins = produceHashJoins(plans, qg, context, kit, singleComponentPlanner) ++
val joins =
produceHashJoins(plans, qg, context, kit, singleComponentPlanner) ++
produceNIJVariations(plans, qg, requiredOrder, context, kit, singleComponentPlanner)

if (joins.nonEmpty) {
Expand Down Expand Up @@ -111,23 +112,45 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
}).toMap
}

// Developers note: This method has been re-implemented in a very low-level imperative style, because
// this code path caused a big SOAK regression for queries with 50-60 plans. The current implementation is
// about 100x faster than the old one, please change functionality here with one eye on performance.
private def produceNIJVariations(plans: Set[PlannedComponent],
qg: QueryGraph,
requiredOrder: RequiredOrder,
context: LogicalPlanningContext,
kit: QueryPlannerKit,
singleComponentPlanner: SingleComponentPlannerTrait): Map[PlannedComponent, (PlannedComponent, PlannedComponent)] = {
(for {
t1@PlannedComponent(qgA, planA) <- plans
t2@PlannedComponent(qgB, planB) <- plans if planA != planB
predicate <- this.predicatesDependendingOnBothSides(qg.selections.flatPredicates, qgA, qgB)
} yield {
val nestedIndexJoinAB = planNIJ(planA, planB, qgA, qgB, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
val nestedIndexJoinBA = planNIJ(planB, planA, qgB, qgA, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)

nestedIndexJoinAB.map(x => (x, t1 -> t2)) ++ nestedIndexJoinBA.map(x => (x, t1 -> t2))
singleComponentPlanner: SingleComponentPlannerTrait):
Map[PlannedComponent, (PlannedComponent, PlannedComponent)] = {
val predicatesWithDependencies = qg.selections.flatPredicates.toArray.map(pred => (pred, pred.dependencies.map(_.name).toArray))
val planArray = plans.toArray
val allCoveredIds = planArray.map(_.queryGraph.allCoveredIds)

val result = Map.newBuilder[PlannedComponent, (PlannedComponent, PlannedComponent)]

var a = 0
while (a < planArray.length) {
var b = a + 1
while (b < planArray.length) {

val planA = planArray(a).plan
val planB = planArray(b).plan
val qgA = planArray(a).queryGraph
val qgB = planArray(b).queryGraph

for (predicate <- this.predicatesDependendingOnBothSides(predicatesWithDependencies, allCoveredIds(a), allCoveredIds(b))) {
val nestedIndexJoinAB = planNIJ(planA, planB, qgA, qgB, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)
val nestedIndexJoinBA = planNIJ(planB, planA, qgB, qgA, qg, requiredOrder, predicate, context, kit, singleComponentPlanner)

nestedIndexJoinAB.foreach(x => result += ((x, planArray(a) -> planArray(b))))
nestedIndexJoinBA.foreach(x => result += ((x, planArray(a) -> planArray(b))))
}
b += 1
}
a += 1
}

}).flatten.toMap
result.result()
}

private def produceHashJoins(plans: Set[PlannedComponent],
Expand Down Expand Up @@ -199,16 +222,26 @@ case object cartesianProductsOrValueJoins extends JoinDisconnectedQueryGraphComp
r.dependencies != l.dependencies => e
}.toSet

def predicatesDependendingOnBothSides(flatPredicates: Seq[Expression], lhs: QueryGraph, rhs: QueryGraph): Seq[Expression] =
flatPredicates.filter { pred =>
val deps = pred.dependencies.map(_.name)
val idsFromLeft = lhs.allCoveredIds
val idsFromRight = rhs.allCoveredIds
val unfullfilledDepsLhs = deps -- idsFromLeft
val unfullfilledDepsRhs = deps -- idsFromRight

unfullfilledDepsLhs.nonEmpty && // The left plan is not enough
unfullfilledDepsRhs.nonEmpty && // Neither is the right one
(unfullfilledDepsLhs -- idsFromRight).isEmpty // But together we're good
}
// Imperative implementation style for performance. See produceNIJVariations.
def predicatesDependendingOnBothSides(predicateDependencies: Array[(Expression, Array[String])], idsFromLeft: Set[String], idsFromRight: Set[String]): Seq[Expression] =
predicateDependencies.filter {
case (_, deps) =>
var i = 0
var unfulfilledLhsDep = false
var unfulfilledRhsDep = false
var forAllLhsOrRhs = true

while (i < deps.length) {
val inLhs = idsFromLeft(deps(i))
val inRhs = idsFromRight(deps(i))
unfulfilledLhsDep = unfulfilledLhsDep || !inLhs
unfulfilledRhsDep = unfulfilledRhsDep || !inRhs
forAllLhsOrRhs = forAllLhsOrRhs && (inLhs || inRhs)
i += 1
}

unfulfilledLhsDep && // The left plan is not enough
unfulfilledRhsDep && // Neither is the right one
forAllLhsOrRhs // But together we're good
}.map(_._1)
}
Original file line number Diff line number Diff line change
Expand Up @@ -201,17 +201,17 @@ class CartesianProductsOrValueJoinsTest
val nProp = prop("n", "prop")
val xProp = prop("x", "prop")

val predicate1 = Contains(nProp, xProp)(pos)
val predicate2 = propEquality("n", "prop", 42)
val predicate1 = Contains(nProp, xProp)(pos) -> Array("n", "x")
val predicate2 = propEquality("n", "prop", 42) -> Array("n")

val qg1 = QueryGraph.empty.withPatternNodes(Set("n"))
val qg2 = QueryGraph.empty.withPatternNodes(Set("x"))
val idsFromLeft = Set("n")
val idsFromRight = Set("x")

// when
val result = cartesianProductsOrValueJoins.predicatesDependendingOnBothSides(Seq(predicate1, predicate2), qg1, qg2)
val result = cartesianProductsOrValueJoins.predicatesDependendingOnBothSides(Array(predicate1, predicate2), idsFromLeft, idsFromRight)

// then
result should be(List(predicate1))
result should be(List(predicate1._1))
}

private def testThis(graph: QueryGraph, input: (PlanningAttributes) => Set[PlannedComponent], assertion: LogicalPlan => Unit): Unit = {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
/*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j Enterprise Edition. The included source
* code can be redistributed and/or modified under the terms of the
* GNU AFFERO GENERAL PUBLIC LICENSE Version 3
* (http://www.fsf.org/licensing/licenses/agpl-3.0.html) with the
* Commons Clause, as found in the associated LICENSE.txt file.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* Neo4j object code can be licensed independently from the source
* under separate terms from the AGPL. Inquiries can be directed to:
* licensing@neo4j.com
*
* More information is also available at:
* https://neo4j.com/licensing/
*/
package org.neo4j.internal.cypher.acceptance

import org.neo4j.cypher.ExecutionEngineFunSuite

class PlannerRobustnessAcceptanceTest extends ExecutionEngineFunSuite with CypherComparisonSupport {

private val t0 = System.nanoTime() / 1000

test("should plan query of 100 patterns in reasonable time") {
val query =
"MATCH " + (1 to 100).map(i => s"(user$i:User {userId:$i})").mkString(", ") +
"RETURN count(*)"

graph.execute("""FOREACH (n IN range(1, 100) | CREATE (:User {userId: n}))""")
graph.createIndex("User", "userId")

val t1 = System.nanoTime() / 1000
val result = graph.execute(query)
while (result.hasNext) result.next()
val t2 = System.nanoTime() / 1000

val setupTime = t1 - t0
val queryTime = t2 - t1

if (queryTime > setupTime) {
fail(
"""Query time for 100-pattern query is too long (bigger that time to start entire db and build index).
| Setup time: %10d us
| Query time: %10d us
""".stripMargin.format(setupTime, queryTime))
}
}
}

0 comments on commit 87da4e7

Please sign in to comment.