Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix sorting in Prediction type for multiclass classification and add stronger tests #213

Merged
merged 13 commits into from
Feb 6, 2019
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ package com.salesforce.op.evaluators

import com.salesforce.op.features.types._
import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
import com.salesforce.op.testkit.{RandomIntegral, RandomReal, RandomText, RandomVector}
import org.apache.spark.ml.linalg.Vectors
import org.junit.runner.RunWith
import org.scalatest.FlatSpec
Expand Down Expand Up @@ -129,6 +130,133 @@ class OpMultiClassificationEvaluatorTest extends FlatSpec with TestSparkContext
)
}

it should "work on randomly generated probabilities" in {
val numClasses = 100
val numRows = 10000

val vectors = RandomVector.dense(RandomReal.uniform[Real](0.0, 1.0), numClasses).limit(numRows)
val probVectors = vectors.map(v => {
val expArray = v.value.toArray.map(math.exp)
val denom = expArray.sum
expArray.map(x => x/denom).toOPVector
})
val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
Prediction(prediction = prob.v.argmax.toDouble, rawPrediction = raw.v.toArray, probability = prob.v.toArray)
}

val labels = RandomIntegral.integrals(from = 0, to = numClasses).limit(numRows)
.map(x => x.value.get.toDouble.toRealNN)

val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)

val topNs = Array(1, 3)
val evaluatorMulti = new OpMultiClassificationEvaluator()
.setLabelCol(rawLabel)
.setPredictionCol(rawPred)
.setTopNs(topNs)
val metricsMulti = evaluatorMulti.evaluateAll(rawDF)

// The accuracy at threshold zero should be the same as what Spark calculates (error = 1 - accuracy)
val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
accuracyAtZero + metricsMulti.Error shouldBe 1.0

// Each row should correspond to either a correct, incorrect, or no prediction
metricsMulti.ThresholdMetrics.correctCounts(1)
.zip(metricsMulti.ThresholdMetrics.incorrectCounts(1))
.zip(metricsMulti.ThresholdMetrics.noPredictionCounts(1)).foreach{
case ((c, i), n) => c + i + n shouldBe numRows
}

// The no prediction count should always be 0 when the threshold is 0
metricsMulti.ThresholdMetrics.noPredictionCounts.foreach{
case (_, v) => v.head shouldBe 0L
}
}

it should "work on probability vectors where there are many ties (low unique score cardinality)" in {
val numClasses = 200
val numRows = 1000
val correctProb = 0.3

// Try and make the score one large number for a random class, and equal & small probabilities for all other ones
val truePredIndex = math.floor(math.random * numClasses).toInt
val vectors = Seq.fill[OPVector](numRows){
val truePredIndex = math.floor(math.random * numClasses).toInt
val myVector = Array.fill(numClasses)(1e-10)
myVector.update(truePredIndex, 4.0)
myVector.toOPVector
}

val probVectors = vectors.map(v => {
val expArray = v.value.toArray.map(math.exp)
val denom = expArray.sum
expArray.map(x => x/denom).toOPVector
})
val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
Prediction(prediction = prob.v.argmax, rawPrediction = raw.v.toArray, probability = prob.v.toArray)
}
val labels = predictions.map(x => if (math.random < correctProb) x.prediction.toRealNN else RealNN(1.0))

val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)

val topNs = Array(1, 3, 5, 10)
val evaluatorMulti = new OpMultiClassificationEvaluator()
.setLabelCol(rawLabel)
.setPredictionCol(rawPred)
.setTopNs(topNs)
val metricsMulti = evaluatorMulti.evaluateAll(rawDF)

// The accuracy at threshold zero should be the same as what Spark calculates (error = 1 - accuracy)
val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
accuracyAtZero + metricsMulti.Error shouldBe 1.0

// Each row should correspond to either a correct, incorrect, or no prediction
metricsMulti.ThresholdMetrics.correctCounts(1)
.zip(metricsMulti.ThresholdMetrics.incorrectCounts(1))
.zip(metricsMulti.ThresholdMetrics.noPredictionCounts(1)).foreach{
case ((c, i), n) => c + i + n shouldBe numRows
}

// The no prediction count should always be 0 when the threshold is 0
metricsMulti.ThresholdMetrics.noPredictionCounts.foreach{
case (_, v) => v.head shouldBe 0L
}
}

// TODO: Add a way to robustly handle scores that tie (either many-way or complete ties)
ignore should "work on probability vectors where there are ties in probabilities" in {
val numClasses = 5
val numRows = 10

val vectors = Seq.fill[OPVector](numRows)(Array.fill(numClasses)(4.2).toOPVector)
val probVectors = vectors.map(v => {
val expArray = v.value.toArray.map(math.exp)
val denom = expArray.sum
expArray.map(x => x/denom).toOPVector
})
val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
Prediction(prediction = math.floor(math.random * numClasses),
rawPrediction = raw.v.toArray, probability = prob.v.toArray)
}
val labels = Seq.fill[RealNN](numRows)(RealNN(1.0))

val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)

val topNs = Array(1, 3)
val evaluatorMulti = new OpMultiClassificationEvaluator()
.setLabelCol(rawLabel)
.setPredictionCol(rawPred)
.setTopNs(topNs)
val metricsMulti = evaluatorMulti.evaluateAll(rawDF)

val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
assert(accuracyAtZero + metricsMulti.Error == 1.0)
}


it should "not allow topNs to be negative or 0" in {
intercept[java.lang.IllegalArgumentException](new OpMultiClassificationEvaluator().setTopNs(Array(0, 1, 3)))
intercept[java.lang.IllegalArgumentException](new OpMultiClassificationEvaluator().setTopNs(Array(1, -4, 3)))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -317,7 +317,10 @@ class Prediction private[op](value: Map[String, Double]) extends RealMap(value)
s"value map must only contain valid keys: '$PredictionName' or " +
s"starting with '$RawPredictionName' or '$ProbabilityName'"
)
private def keysStartsWith(name: String): Array[String] = value.keys.filter(_.startsWith(name)).toArray.sorted

// Need to make sure we sort the keys by their final index, which comes after an underscore in the apply function
private def keysStartsWith(name: String): Array[String] = value.keys.filter(_.startsWith(name)).toArray
.sortBy(_.split('_').last.toInt)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

depending how wide the keys are, but it would be more efficient to scan keys backwards to look for _ with lastIndexOf to map to a substring. It would also produce less transient garbage: a single string instead of an array.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Too late for the party :)


/**
* Prediction value
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -182,7 +182,11 @@ class FeatureTypeValueTest extends PropSpec with PropertyChecks with TestCommon
val pred = Prediction.Keys.PredictionName -> a.head
val expected = (rawPred ++ prob).toMap + pred
checkVals(Prediction(a.head, v, v), expected)
checkVals(Prediction(a.head, a, a), expected)
val fullPred = Prediction(a.head, a, a)
checkVals(fullPred, expected)
fullPred.prediction shouldBe a.head
fullPred.probability shouldBe a
fullPred.rawPrediction shouldBe a
}

}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,14 @@ class PredictionTest extends FlatSpec with TestCommon {
it should "return raw prediction" in {
Prediction(2.0).rawPrediction shouldBe Array()
Prediction(1.0, Array(1.0, 2.0), Array.empty[Double]).rawPrediction shouldBe Array(1.0, 2.0)
Prediction(1.0, (1 until 200).map(_.toDouble).toArray, Array.empty[Double]).rawPrediction shouldBe
(1 until 200).map(_.toDouble).toArray
}
it should "return probability" in {
Prediction(3.0).probability shouldBe Array()
Prediction(1.0, Array.empty[Double], Array(1.0, 2.0)).probability shouldBe Array(1.0, 2.0)
Prediction(1.0, Array.empty[Double], (1 until 200).map(_.toDouble).toArray).probability shouldBe
(1 until 200).map(_.toDouble).toArray
}
it should "return score" in {
Prediction(4.0).score shouldBe Array(4.0)
Expand Down