salesforce · Jauntbox · Feb 6, 2019 · Jan 31, 2019 · Jan 31, 2019 · Jan 31, 2019
@@ -32,6 +32,7 @@ package com.salesforce.op.evaluators
 
 import com.salesforce.op.features.types._
 import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext}
+import com.salesforce.op.testkit.{RandomIntegral, RandomReal, RandomText, RandomVector}
 import org.apache.spark.ml.linalg.Vectors
 import org.junit.runner.RunWith
 import org.scalatest.FlatSpec
@@ -129,6 +130,133 @@ class OpMultiClassificationEvaluatorTest extends FlatSpec with TestSparkContext
     )
   }
 
+  it should "work on randomly generated probabilities" in {
+    val numClasses = 100
+    val numRows = 10000
+
+    val vectors = RandomVector.dense(RandomReal.uniform[Real](0.0, 1.0), numClasses).limit(numRows)
+    val probVectors = vectors.map(v => {
+      val expArray = v.value.toArray.map(math.exp)
+      val denom = expArray.sum
+      expArray.map(x => x/denom).toOPVector
+    })
+    val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
+      Prediction(prediction = prob.v.argmax.toDouble, rawPrediction = raw.v.toArray, probability = prob.v.toArray)
+    }
+
+    val labels = RandomIntegral.integrals(from = 0, to = numClasses).limit(numRows)
+      .map(x => x.value.get.toDouble.toRealNN)
+
+    val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
+    val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)
+
+    val topNs = Array(1, 3)
+    val evaluatorMulti = new OpMultiClassificationEvaluator()
+      .setLabelCol(rawLabel)
+      .setPredictionCol(rawPred)
+      .setTopNs(topNs)
+    val metricsMulti = evaluatorMulti.evaluateAll(rawDF)
+
+    // The accuracy at threshold zero should be the same as what Spark calculates (error = 1 - accuracy)
+    val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
+    accuracyAtZero + metricsMulti.Error shouldBe 1.0
+
+    // Each row should correspond to either a correct, incorrect, or no prediction
+    metricsMulti.ThresholdMetrics.correctCounts(1)
+      .zip(metricsMulti.ThresholdMetrics.incorrectCounts(1))
+      .zip(metricsMulti.ThresholdMetrics.noPredictionCounts(1)).foreach{
+      case ((c, i), n) => c + i + n shouldBe numRows
+    }
+
+    // The no prediction count should always be 0 when the threshold is 0
+    metricsMulti.ThresholdMetrics.noPredictionCounts.foreach{
+      case (_, v) => v.head shouldBe 0L
+    }
+  }
+
+  it should "work on probability vectors where there are many ties (low unique score cardinality)" in {
+    val numClasses = 200
+    val numRows = 1000
+    val correctProb = 0.3
+
+    // Try and make the score one large number for a random class, and equal & small probabilities for all other ones
+    val truePredIndex = math.floor(math.random * numClasses).toInt
+    val vectors = Seq.fill[OPVector](numRows){
+      val truePredIndex = math.floor(math.random * numClasses).toInt
+      val myVector = Array.fill(numClasses)(1e-10)
+      myVector.update(truePredIndex, 4.0)
+      myVector.toOPVector
+    }
+
+    val probVectors = vectors.map(v => {
+      val expArray = v.value.toArray.map(math.exp)
+      val denom = expArray.sum
+      expArray.map(x => x/denom).toOPVector
+    })
+    val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
+      Prediction(prediction = prob.v.argmax, rawPrediction = raw.v.toArray, probability = prob.v.toArray)
+    }
+    val labels = predictions.map(x => if (math.random < correctProb) x.prediction.toRealNN else RealNN(1.0))
+
+    val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
+    val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)
+
+    val topNs = Array(1, 3, 5, 10)
+    val evaluatorMulti = new OpMultiClassificationEvaluator()
+      .setLabelCol(rawLabel)
+      .setPredictionCol(rawPred)
+      .setTopNs(topNs)
+    val metricsMulti = evaluatorMulti.evaluateAll(rawDF)
+
+    // The accuracy at threshold zero should be the same as what Spark calculates (error = 1 - accuracy)
+    val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
+    accuracyAtZero + metricsMulti.Error shouldBe 1.0
+
+    // Each row should correspond to either a correct, incorrect, or no prediction
+    metricsMulti.ThresholdMetrics.correctCounts(1)
+      .zip(metricsMulti.ThresholdMetrics.incorrectCounts(1))
+      .zip(metricsMulti.ThresholdMetrics.noPredictionCounts(1)).foreach{
+      case ((c, i), n) => c + i + n shouldBe numRows
+    }
+
+    // The no prediction count should always be 0 when the threshold is 0
+    metricsMulti.ThresholdMetrics.noPredictionCounts.foreach{
+      case (_, v) => v.head shouldBe 0L
+    }
+  }
+
+  // TODO: Add a way to robustly handle scores that tie (either many-way or complete ties)
+  ignore should "work on probability vectors where there are ties in probabilities" in {
+    val numClasses = 5
+    val numRows = 10
+
+    val vectors = Seq.fill[OPVector](numRows)(Array.fill(numClasses)(4.2).toOPVector)
+    val probVectors = vectors.map(v => {
+      val expArray = v.value.toArray.map(math.exp)
+      val denom = expArray.sum
+      expArray.map(x => x/denom).toOPVector
+    })
+    val predictions = vectors.zip(probVectors).map{ case (raw, prob) =>
+      Prediction(prediction = math.floor(math.random * numClasses),
+        rawPrediction = raw.v.toArray, probability = prob.v.toArray)
+    }
+    val labels = Seq.fill[RealNN](numRows)(RealNN(1.0))
+
+    val generatedData: Seq[(RealNN, Prediction)] = labels.zip(predictions)
+    val (rawDF, rawLabel, rawPred) = TestFeatureBuilder(generatedData)
+
+    val topNs = Array(1, 3)
+    val evaluatorMulti = new OpMultiClassificationEvaluator()
+      .setLabelCol(rawLabel)
+      .setPredictionCol(rawPred)
+      .setTopNs(topNs)
+    val metricsMulti = evaluatorMulti.evaluateAll(rawDF)
+
+    val accuracyAtZero = (metricsMulti.ThresholdMetrics.correctCounts(1).head * 1.0)/numRows
+    assert(accuracyAtZero + metricsMulti.Error == 1.0)
+  }
+
+
   it should "not allow topNs to be negative or 0" in {
     intercept[java.lang.IllegalArgumentException](new OpMultiClassificationEvaluator().setTopNs(Array(0, 1, 3)))
     intercept[java.lang.IllegalArgumentException](new OpMultiClassificationEvaluator().setTopNs(Array(1, -4, 3)))

@@ -317,7 +317,10 @@ class Prediction private[op](value: Map[String, Double]) extends RealMap(value)
     s"value map must only contain valid keys: '$PredictionName' or " +
       s"starting with '$RawPredictionName' or '$ProbabilityName'"
   )
-  private def keysStartsWith(name: String): Array[String] = value.keys.filter(_.startsWith(name)).toArray.sorted
+
+  // Need to make sure we sort the keys by their final index, which comes after an underscore in the apply function
+  private def keysStartsWith(name: String): Array[String] = value.keys.filter(_.startsWith(name)).toArray
+    .sortBy(_.split('_').last.toInt)
 
   /**
    * Prediction value

@@ -182,7 +182,11 @@ class FeatureTypeValueTest extends PropSpec with PropertyChecks with TestCommon
       val pred = Prediction.Keys.PredictionName -> a.head
       val expected = (rawPred ++ prob).toMap + pred
       checkVals(Prediction(a.head, v, v), expected)
-      checkVals(Prediction(a.head, a, a), expected)
+      val fullPred = Prediction(a.head, a, a)
+      checkVals(fullPred, expected)
+      fullPred.prediction shouldBe a.head
+      fullPred.probability shouldBe a
+      fullPred.rawPrediction shouldBe a
     }
 
   }

@@ -69,10 +69,14 @@ class PredictionTest extends FlatSpec with TestCommon {
   it should "return raw prediction" in {
     Prediction(2.0).rawPrediction shouldBe Array()
     Prediction(1.0, Array(1.0, 2.0), Array.empty[Double]).rawPrediction shouldBe Array(1.0, 2.0)
+    Prediction(1.0, (1 until 200).map(_.toDouble).toArray, Array.empty[Double]).rawPrediction shouldBe
+      (1 until 200).map(_.toDouble).toArray
   }
   it should "return probability" in {
     Prediction(3.0).probability shouldBe Array()
     Prediction(1.0, Array.empty[Double], Array(1.0, 2.0)).probability shouldBe Array(1.0, 2.0)
+    Prediction(1.0, Array.empty[Double], (1 until 200).map(_.toDouble).toArray).probability shouldBe
+      (1 until 200).map(_.toDouble).toArray
   }
   it should "return score" in {
     Prediction(4.0).score shouldBe Array(4.0)