Skip to content

Commit

Permalink
Merge pull request #5414 from pontusmelke/2.2-selectivity-many-labels
Browse files Browse the repository at this point in the history
Properly handle selectivities when many labels
  • Loading branch information
systay committed Sep 24, 2015
2 parents e27b12f + dafa279 commit 53ac064
Show file tree
Hide file tree
Showing 22 changed files with 110 additions and 70 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -27,10 +27,10 @@ case object HardcodedGraphStatistics extends HardcodedGraphStatisticsValues

class HardcodedGraphStatisticsValues extends GraphStatistics {
val NODES_CARDINALITY = Cardinality(10000)
val NODES_WITH_LABEL_SELECTIVITY = Selectivity(0.2)
val NODES_WITH_LABEL_SELECTIVITY = Selectivity.of(0.2).get
val NODES_WITH_LABEL_CARDINALITY = NODES_CARDINALITY * NODES_WITH_LABEL_SELECTIVITY
val RELATIONSHIPS_CARDINALITY = Cardinality(50000)
val INDEX_SELECTIVITY = Selectivity(.02)
val INDEX_SELECTIVITY = Selectivity.of(.02).get

def indexSelectivity(label: LabelId, property: PropertyKeyId): Option[Selectivity] =
Some(INDEX_SELECTIVITY)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ case class Cardinality(amount: Double) extends Ordered[Cardinality] {
def *(that: Selectivity): Cardinality = amount * that.factor
def +(that: Cardinality): Cardinality = amount + that.amount
def *(that: Cardinality): Cardinality = amount * that.amount
def /(that: Cardinality): Option[Selectivity] = if (that.amount == 0) None else Some(amount / that.amount)
def /(that: Cardinality): Option[Selectivity] = if (that.amount == 0) None else Selectivity.of(amount / that.amount)
def *(that: CostPerRow): Cost = amount * that.cost
def *(that: Cost): Cost = amount * that.gummyBears
def ^(a: Int): Cardinality = Math.pow(amount, a)
Expand Down Expand Up @@ -153,15 +153,14 @@ object Multiplier {
Multiplier(Math.max(l.coefficient, r.coefficient))
}

case class Selectivity(factor: Double) extends Ordered[Selectivity] {
def -(other: Selectivity): Selectivity = factor - other.factor
def *(other: Selectivity): Selectivity = other.factor * factor
def *(other: Multiplier): Selectivity = factor * other.coefficient
def ^(a: Int): Selectivity = Math.pow(factor, a)
case class Selectivity private(factor: Double) extends Ordered[Selectivity] {
assert(factor >= 0 && factor <= 1.0)
def *(other: Selectivity): Selectivity = Selectivity(other.factor * factor)
def ^(a: Int): Selectivity = Selectivity(Math.pow(factor, a))
def negate: Selectivity = {
val f = 1.0 - factor
if (factor == 0 || f < 1)
f
Selectivity(f)
else
Selectivity.CLOSEST_TO_ONE
}
Expand All @@ -170,13 +169,13 @@ case class Selectivity(factor: Double) extends Ordered[Selectivity] {
}

object Selectivity {
def of(value: Double): Option[Selectivity] = if (value.isInfinite || value.isNaN) None else Some(value)

def of(value: Double): Option[Selectivity] = if (value.isInfinite || value.isNaN || value < 0.0 || value > 1.0) None else Some(Selectivity(value))

val ZERO = Selectivity(0.0d)
val ONE = Selectivity(1.0d)
val CLOSEST_TO_ONE = Selectivity(1 - 5.56e-17) // we can get closer, but this is close enough

implicit def lift(amount: Double): Selectivity = Selectivity(amount)

implicit def turnSeqIntoSingleSelectivity(p: Seq[Selectivity]): Selectivity =
p.reduceOption(_ * _).getOrElse(Selectivity(1))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ class StatisticsBackedCardinalityModel(queryGraphCardinalityModel: QueryGraphCar

// Distinct
case projection: AggregatingQueryProjection if projection.aggregationExpressions.isEmpty =>
in * Selectivity(0.95)
in * Selectivity.of(0.95).get

// Aggregates
case _: AggregatingQueryProjection =>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ case class ExpressionSelectivityCalculator(stats: GraphStatistics, combiner: Sel

// WHERE false
case False() =>
Selectivity(0)
Selectivity.of(0).get

// WHERE x.prop IN [...]
case In(Property(Identifier(name), propertyKey), Collection(expressions)) =>
Expand Down Expand Up @@ -104,15 +104,13 @@ case class ExpressionSelectivityCalculator(stats: GraphStatistics, combiner: Sel
stats.indexSelectivity(labelId, propertyKeyId)

case _ =>
Some(Selectivity(0))
Some(Selectivity.ZERO)
}
}

val expandedSelectivities = Stream.from(0).take(sizeHint).flatMap(_ => indexSelectivities)
val itemSelectivity = combiner.orTogetherSelectivities(indexSelectivities).getOrElse(DEFAULT_EQUALITY_SELECTIVITY)
val selectivity = combiner.orTogetherSelectivities(1.to(sizeHint).map(_ => itemSelectivity)).getOrElse(DEFAULT_EQUALITY_SELECTIVITY)

val selectivity: Option[Selectivity] = combiner.orTogetherSelectivities(expandedSelectivities)

selectivity.
getOrElse(DEFAULT_EQUALITY_SELECTIVITY * Multiplier(sizeHint)) // If no index exist, use default equality selectivity
selectivity
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ case object IndependenceCombiner extends SelectivityCombiner {
}

private def fromBigDecimal(bigDecimal: math.BigDecimal): Selectivity = {
Selectivity(bigDecimal.doubleValue())
Selectivity.of(bigDecimal.doubleValue()).get
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,6 @@ case class AssumeIndependenceQueryGraphCardinalityModel(stats: GraphStatistics,

val selectivity = combiner.andTogetherSelectivities(expressionSelectivities ++ patternSelectivities.flatten)

(selectivity.getOrElse(Selectivity(1)), numberOfZeroZeroRels)
(selectivity.getOrElse(Selectivity.ONE), numberOfZeroZeroRels)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,7 @@ case class PatternSelectivityCalculator(stats: GraphStatistics, combiner: Select
def apply(pattern: PatternRelationship, labels: Map[IdName, Set[LabelName]])
(implicit semanticTable: SemanticTable, selections: Selections): Selectivity = {
val allNodes = stats.nodesWithLabelCardinality(None)
val lhs = pattern.nodes._1
val rhs = pattern.nodes._2
val (lhs, rhs) = pattern.nodes
val labelsOnLhs: Seq[TokenSpec[LabelId]] = mapToLabelTokenSpecs(selections.labelsOnNode(lhs) ++ labels.getOrElse(lhs, Set.empty))
val labelsOnRhs: Seq[TokenSpec[LabelId]] = mapToLabelTokenSpecs(selections.labelsOnNode(rhs) ++ labels.getOrElse(rhs, Set.empty))

Expand All @@ -53,7 +52,7 @@ case class PatternSelectivityCalculator(stats: GraphStatistics, combiner: Select

// If either side of our pattern is empty, it's all empty
if (lhsCardinality == Cardinality.EMPTY || lhsCardinality == Cardinality.EMPTY)
Selectivity(1)
Selectivity.ONE
else {
val types: Seq[TokenSpec[RelTypeId]] = mapToRelTokenSpecs(pattern.types.toSet)

Expand Down Expand Up @@ -133,12 +132,12 @@ case class PatternSelectivityCalculator(stats: GraphStatistics, combiner: Select

private def calculateLabelSelectivity(specs: Seq[TokenSpec[LabelId]]): Selectivity = {
val selectivities = specs map {
case SpecifiedButUnknown() => Selectivity(0)
case SpecifiedButUnknown() => Selectivity.ONE
case spec: TokenSpec[LabelId] =>
stats.nodesWithLabelCardinality(spec.id) / stats.nodesWithLabelCardinality(None) getOrElse Selectivity.ZERO
}

combiner.andTogetherSelectivities(selectivities).getOrElse(Selectivity(1))
combiner.andTogetherSelectivities(selectivities).getOrElse(Selectivity.ONE)
}

// These two methods should be one, but I failed to conjure up the proper Scala type magic to make it work
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,13 @@ import org.neo4j.cypher.internal.compiler.v2_2.planner.logical.{Cardinality, Sel
import org.neo4j.cypher.internal.compiler.v2_2.{LabelId, PropertyKeyId, RelTypeId}

object GraphStatistics {
val DEFAULT_RANGE_SELECTIVITY = Selectivity(0.3)
val DEFAULT_PREDICATE_SELECTIVITY = Selectivity(0.75)
val DEFAULT_EQUALITY_SELECTIVITY = Selectivity(0.1)
val DEFAULT_RANGE_SELECTIVITY = Selectivity.of(0.3).get
val DEFAULT_PREDICATE_SELECTIVITY = Selectivity.of(0.75).get
val DEFAULT_EQUALITY_SELECTIVITY = Selectivity.of(0.1).get
val DEFAULT_NUMBER_OF_ID_LOOKUPS = Cardinality(25)
val DEFAULT_NUMBER_OF_INDEX_LOOKUPS = Cardinality(25)
val DEFAULT_LIMIT_CARDINALITY = Cardinality(75)
val DEFAULT_REL_UNIQUENESS_SELECTIVITY = Selectivity(1.0 - 1 / 100 /*rel-cardinality*/)
val DEFAULT_REL_UNIQUENESS_SELECTIVITY = Selectivity.of(1.0 - 1.0 / 100 /*rel-cardinality*/).get
}

trait GraphStatistics {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,9 @@ class QueriedGraphStatistics(graph: GraphDatabaseService, queryContext: QueryCon
}

if (values.isEmpty)
Some(Selectivity(0)) // Avoids division by zero
Some(Selectivity.ZERO) // Avoids division by zero
else
Some(Selectivity(1.0 / values.size))
Selectivity.of(1.0 / values.size)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,6 @@ class DbStructureGraphStatistics(lookup: DbStructureLookup) extends GraphStatist
*/
override def indexSelectivity( label: LabelId, property: PropertyKeyId ): Option[Selectivity] = {
val result = lookup.indexSelectivity( label.id, property.id )
if (result.isNaN) None else Some(Selectivity(result))
Selectivity.of(result)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ import org.neo4j.cypher.internal.commons.CypherFunSuite
class MetricsTest extends CypherFunSuite {

test("negating a selectivity behaves as expected") {
Selectivity(.1).negate should not equal Selectivity.ONE
Selectivity(5.6e-17).negate should not equal Selectivity.ONE
Selectivity(1e-300).negate should not equal Selectivity.ONE
Selectivity.of(.1).get.negate should not equal Selectivity.ONE
Selectivity.of(5.6e-17).get.negate should not equal Selectivity.ONE
Selectivity.of(1e-300).get.negate should not equal Selectivity.ONE

Selectivity.ZERO.negate should equal(Selectivity.ONE)
Selectivity(0).negate should equal(Selectivity.ONE)
Selectivity.ZERO.negate should equal(Selectivity.ONE)

Selectivity.ONE.negate should equal(Selectivity.ZERO)
Selectivity(1).negate should equal(Selectivity.ZERO)
Selectivity.ONE.negate should equal(Selectivity.ZERO)

Selectivity.CLOSEST_TO_ONE.negate should not equal Selectivity.ONE
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class StatisticsBackedCardinalityModelTest extends CypherFunSuite with LogicalPl

test("aggregations should never increase cardinality") {
givenPattern("MATCH (a:Person)-[:REL]->() WITH a, count(*) as c MATCH (a)-[:REL]->()").
withGraphNodes(1).
withGraphNodes(allNodes).
withLabel('Person -> .1).
withRelationshipCardinality('Person -> 'REL -> 'Person -> .5).
shouldHavePlannerQueryCardinality(produceCardinalityModel)(2.5)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,9 @@ trait CardinalityTestHelper extends QueryGraphProducer with CardinalityCustomMat

def combiner: SelectivityCombiner = IndependenceCombiner

def not(number: Double) = Selectivity(number).negate.factor
def and(numbers: Double*) = combiner.andTogetherSelectivities(numbers.map(Selectivity.apply)).get.factor
def or(numbers: Double*) = combiner.orTogetherSelectivities(numbers.map(Selectivity.apply)).get.factor
def not(number: Double) = Selectivity.of(number).getOrElse(Selectivity.ONE).negate.factor
def and(numbers: Double*) = combiner.andTogetherSelectivities(numbers.map(Selectivity.of(_).getOrElse(Selectivity.ONE))).get.factor
def or(numbers: Double*) = combiner.orTogetherSelectivities(numbers.map(Selectivity.of(_).getOrElse(Selectivity.ONE))).get.factor

def degree(above: Double, below: Double) = above / below

Expand Down Expand Up @@ -139,9 +139,9 @@ trait CardinalityTestHelper extends QueryGraphProducer with CardinalityCustomMat
(labelName, propertyName) match {
case (Some(lName), Some(pName)) =>
val selectivity = knownIndexSelectivity.get((lName, pName))
selectivity.map(Selectivity.apply)
selectivity.map(Selectivity.of(_).getOrElse(Selectivity.ONE))

case _ => Some(Selectivity(0))
case _ => Some(Selectivity.ZERO)
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ class ExpressionSelectivityCalculatorTest extends CypherFunSuite with AstConstru

val stats = mock[GraphStatistics]
Mockito.when(stats.nodesWithLabelCardinality(None)).thenReturn(1000.0)
Mockito.when(stats.indexSelectivity(LabelId(0), PropertyKeyId(0))).thenReturn(Some(Selectivity(0.1d)))
Mockito.when(stats.indexSelectivity(LabelId(0), PropertyKeyId(0))).thenReturn(Selectivity.of(0.1d))

val calculator = ExpressionSelectivityCalculator(stats, IndependenceCombiner)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,8 @@ class NodeCardinalityEstimatorTest extends CypherFunSuite with LogicalPlanningTe
val hasAnimalOnA: Expression = HasLabels(ident("a"), Seq(LabelName("Animal")_))_
val hasPersonOnB: Expression = HasLabels(ident("b"), Seq(LabelName("Person")_))_

val personSelectivity = Selectivity(0.5)
val animalSelectivity = Selectivity(0.1)
val personSelectivity = Selectivity.of(0.5).get
val animalSelectivity = Selectivity.of(0.1).get

test("should estimate node labels") {
when( selectivityEstimator.apply(hasPersonOnA ) ).thenReturn( personSelectivity )
Expand Down Expand Up @@ -106,8 +106,8 @@ class NodeCardinalityEstimatorTest extends CypherFunSuite with LogicalPlanningTe
val pred1 = mock[Expression]
val pred2 = mock[Expression]

when( selectivityEstimator.apply( pred1 ) ).thenReturn( Selectivity( 0.5d ) )
when( selectivityEstimator.apply( pred2 ) ).thenReturn( Selectivity( 0.25d ) )
when( selectivityEstimator.apply( pred1 ) ).thenReturn( Selectivity.of( 0.5d ).get )
when( selectivityEstimator.apply( pred2 ) ).thenReturn( Selectivity.of( 0.25d ).get )

val qg = QueryGraph
.empty
Expand All @@ -122,7 +122,7 @@ class NodeCardinalityEstimatorTest extends CypherFunSuite with LogicalPlanningTe
val (estimates, used) = estimator(qg)

estimates should equal(Map(
IdName("a") -> allNodes * Selectivity( 0.5d ),
IdName("a") -> allNodes * Selectivity.of( 0.5d ).get,
IdName("b") -> allNodes
))

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,27 +25,27 @@ import org.neo4j.cypher.internal.compiler.v2_2.planner.logical.Selectivity
class SelectivityCombinerTest extends CypherFunSuite {

test("should not lose precision for intermediate numbers") {
val selectivities = Seq(Selectivity(1e-10), Selectivity(2e-10))
val selectivities = Seq(Selectivity.of(1e-10).get, Selectivity.of(2e-10).get)

IndependenceCombiner.orTogetherSelectivities(selectivities).get should not equal Selectivity(0)
IndependenceCombiner.orTogetherSelectivities(selectivities).get should not equal Selectivity.ZERO
}

test("should not lose precision for small numbers") {
val selectivities = Seq(Selectivity(1e-100), Selectivity(2e-100), Selectivity(1e-300))
val selectivities = Seq(Selectivity.of(1e-100).get, Selectivity.of(2e-100).get, Selectivity.of(1e-300).get)

IndependenceCombiner.orTogetherSelectivities(selectivities).get should not equal Selectivity(0)
IndependenceCombiner.orTogetherSelectivities(selectivities).get should not equal Selectivity.ZERO
}

test("ANDing together works as expected") {
val selectivities = Seq(Selectivity(.1), Selectivity(.2), Selectivity.ONE)
val selectivities = Seq(Selectivity.of(.1).get, Selectivity.of(.2).get, Selectivity.ONE)

IndependenceCombiner.andTogetherSelectivities(selectivities).get should equal(Selectivity(0.02))
IndependenceCombiner.andTogetherSelectivities(selectivities).get should equal(Selectivity.of(0.02).get)
}

test("ORing together works as expected") {
val selectivities = Seq(Selectivity(.1), Selectivity(.2))
val selectivities = Seq(Selectivity.of(.1).get, Selectivity.of(.2).get)

IndependenceCombiner.orTogetherSelectivities(selectivities).get should equal(Selectivity(0.28))
IndependenceCombiner.orTogetherSelectivities(selectivities).get should equal(Selectivity.of(0.28).get)
}

}
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,8 @@ class AssumeIndependenceQueryGraphCardinalityModelTest extends RandomizedCardina
"MATCH (a:A:B)-->()"
-> {
val maxRelCount = N * N * Asel * Bsel
val A_relSelectivity = A_STAR_STAR / maxRelCount
val B_relSelectivity = B_STAR_STAR / maxRelCount
val A_relSelectivity = math.min(A_STAR_STAR / maxRelCount, 1.0)
val B_relSelectivity = math.min(B_STAR_STAR / maxRelCount, 1.0)
val relSelectivity = A_relSelectivity * B_relSelectivity
A * B * relSelectivity
},
Expand All @@ -132,7 +132,7 @@ class AssumeIndependenceQueryGraphCardinalityModelTest extends RandomizedCardina
val patternNodeCrossProduct = N * N
val labelSelectivity = Asel * Bsel
val maxRelCount = patternNodeCrossProduct * labelSelectivity
val relSelectivity = (A_T1_B + A_T2_B) / maxRelCount - (A_T1_B / maxRelCount) * (A_T2_B / maxRelCount)
val relSelectivity = or(A_T1_B / maxRelCount, A_T2_B / maxRelCount)
patternNodeCrossProduct * labelSelectivity * relSelectivity
},

Expand All @@ -153,8 +153,8 @@ class AssumeIndependenceQueryGraphCardinalityModelTest extends RandomizedCardina
val patternNodeCrossProduct = N * N
val labelSelectivity = Asel * Dsel
val maxRelCount = patternNodeCrossProduct * labelSelectivity
val relSelectivityT1 = (A_T1_STAR / maxRelCount) * (D_T1_STAR / maxRelCount)
val relSelectivityT2 = (A_T2_STAR / maxRelCount) * (D_T2_STAR / maxRelCount)
val relSelectivityT1 = and(A_T1_STAR / maxRelCount, D_T1_STAR / maxRelCount)
val relSelectivityT2 = and(A_T2_STAR / maxRelCount, D_T2_STAR / maxRelCount)
val relSelectivity = or(relSelectivityT1, relSelectivityT2)
patternNodeCrossProduct * labelSelectivity * relSelectivity
},
Expand All @@ -168,8 +168,8 @@ class AssumeIndependenceQueryGraphCardinalityModelTest extends RandomizedCardina
val patternNodeCrossProduct = N * N
val labelSelectivity = Asel * Bsel * Csel * Dsel
val maxRelCount = patternNodeCrossProduct * labelSelectivity
val relSelT1 = (A_T1_C / maxRelCount) * (A_T1_D / maxRelCount) * (B_T1_C / maxRelCount) * (B_T1_D / maxRelCount)
val relSelT2 = (A_T2_C / maxRelCount) * (A_T2_D / maxRelCount) * (B_T2_C / maxRelCount) * (B_T2_D / maxRelCount)
val relSelT1 = and(A_T1_C / maxRelCount, A_T1_D / maxRelCount, B_T1_C / maxRelCount, B_T1_D / maxRelCount)
val relSelT2 = and(A_T2_C / maxRelCount, A_T2_D / maxRelCount, B_T2_C / maxRelCount, B_T2_D / maxRelCount)
val relSelectivity = or(relSelT1, relSelT2)
patternNodeCrossProduct * labelSelectivity * relSelectivity
},
Expand Down

0 comments on commit 53ac064

Please sign in to comment.