Use the R-7 estimation strategy when computing percentiles (#97)

spinnaker · Oct 3, 2017 · ab9549a · ab9549a
1 parent 12fb29f
commit ab9549a
Show file tree

Hide file tree

Showing 4 changed files with 95 additions and 17 deletions.
diff --git a/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/detectors/IQRDetector.scala b/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/detectors/IQRDetector.scala
@@ -16,7 +16,7 @@
 
 package com.netflix.kayenta.judge.detectors
 
-import org.apache.commons.math3.stat.StatUtils
+import com.netflix.kayenta.judge.stats.DescriptiveStatistics.percentile
 
 /**
   * Interquartile Range Detector
@@ -39,8 +39,8 @@ class IQRDetector(factor: Double = 1.5, reduceSensitivity: Boolean = false) exte
     */
   private def calculateIQR(data: Array[Double]): (Double, Double) = {
     //Calculate the 25th and 75th percentiles
-    val p75 = StatUtils.percentile(data, 75)
-    val p25 = StatUtils.percentile(data, 25)
+    val p75 = percentile(data, 75)
+    val p25 = percentile(data, 25)
 
     //Calculate the Interquartile Range (IQR)
     val iqr = p75-p25
@@ -60,8 +60,8 @@ class IQRDetector(factor: Double = 1.5, reduceSensitivity: Boolean = false) exte
       val (lowerIQR, upperIQR) = calculateIQR(data)
 
       //Calculate the 1st and 99th percentiles
-      val p01 = StatUtils.percentile(data, 1)
-      val p99 = StatUtils.percentile(data, 99)
+      val p01 = percentile(data, 1)
+      val p99 = percentile(data, 99)
 
       //Calculate the upper and lower fences
       val lowerFence = math.min(p01, lowerIQR)

diff --git a/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/stats/DescriptiveStatistics.scala b/kayenta-judge/src/main/scala/com/netflix/kayenta/judge/stats/DescriptiveStatistics.scala
@@ -18,25 +18,50 @@ package com.netflix.kayenta.judge.stats
 
 import com.netflix.kayenta.judge.Metric
 import org.apache.commons.math3.stat.StatUtils
+import org.apache.commons.math3.stat.descriptive.rank.Percentile
+import org.apache.commons.math3.stat.descriptive.rank.Percentile.EstimationType
 
 case class MetricStatistics(min: Double, max: Double, mean: Double, median: Double, count: Int)
 
 object DescriptiveStatistics {
 
   def mean(metric: Metric): Double = {
-    if(metric.values.isEmpty) 0.0 else StatUtils.mean(metric.values)
+    if (metric.values.isEmpty) 0.0 else StatUtils.mean(metric.values)
   }
 
   def median(metric: Metric): Double = {
-    if(metric.values.isEmpty) 0.0 else StatUtils.percentile(metric.values, 50)
+    if (metric.values.isEmpty) 0.0 else StatUtils.percentile(metric.values, 50)
   }
 
   def min(metric: Metric): Double = {
-    if(metric.values.isEmpty) 0.0 else StatUtils.min(metric.values)
+    if (metric.values.isEmpty) 0.0 else StatUtils.min(metric.values)
   }
 
   def max(metric: Metric): Double = {
-    if(metric.values.isEmpty) 0.0 else StatUtils.max(metric.values)
+    if (metric.values.isEmpty) 0.0 else StatUtils.max(metric.values)
+  }
+
+  /**
+    * Returns an estimate of the pth percentile of the values in the metric object.
+    * Uses the R-7 estimation strategy when the desired percentile lies between two data points.
+    * @param metric input metric
+    * @param p the percentile value to compute
+    * @return the percentile value or Double.NaN if the metric is empty
+    */
+  def percentile(metric: Metric, p: Double): Double ={
+    this.percentile(metric.values, p)
+  }
+
+  /**
+    * Returns an estimate of the pth percentile of the values in the values array.
+    * Uses the R-7 estimation strategy when the desired percentile lies between two data points.
+    * @param values input array of values
+    * @param p the percentile value to compute
+    * @return the percentile value or Double.NaN if the array is empty
+    */
+  def percentile(values: Array[Double], p: Double): Double ={
+    val percentile = new Percentile().withEstimationType(EstimationType.R_7)
+    percentile.evaluate(values, p)
   }
 
   def summary(metric: Metric): MetricStatistics = {

diff --git a/kayenta-judge/src/test/scala/com/netflix/kayenta/judge/DetectorSuite.scala b/kayenta-judge/src/test/scala/com/netflix/kayenta/judge/DetectorSuite.scala
@@ -59,18 +59,25 @@ class DetectorSuite extends FunSuite{
     assert(result === truth)
   }
 
-  test("IQR Reduce Sensitivity"){
-    val testData = Array(1.0, 1.0, 1.0, 1.0, 1.0, 20.0, 1.0, 1.0, 1.0, 1.0, 1.0)
-    val truth = Array(false, false, false, false, false, false, false, false, false, false, false)
+  test("IQR Empty Data"){
+    val testData = Array[Double]()
+    val truth = Array[Boolean]()
 
-    val detector = new IQRDetector(factor = 3.0, reduceSensitivity=true)
+    val detector = new IQRDetector(factor = 1.5)
     val result = detector.detect(testData)
     assert(result === truth)
   }
 
-  test("IQR Empty Data"){
-    val testData = Array[Double]()
-    val truth = Array[Boolean]()
+  test("IQR NIST Test"){
+    val testData = Array[Double](
+      30, 171, 184, 201, 212, 250, 265, 270, 272, 289, 305, 306, 322, 322, 336, 346,
+      351, 370, 390, 404, 409, 411, 436, 437, 439, 441, 444, 448, 451, 453, 470, 480,
+      482, 487, 494, 495, 499, 503, 514, 521, 522, 527, 548, 550, 559, 560, 570, 572,
+      574, 578, 585, 592, 592, 607, 616, 618, 621, 629, 637, 638, 640, 656, 668, 707,
+      709, 719, 737, 739, 752, 758, 766, 792, 792, 794, 802, 818, 830, 832, 843, 858,
+      860, 869, 918, 925, 953, 991, 1000, 1005, 1068, 1441
+    )
+    val truth = Array.fill[Boolean](testData.length - 1)(false) :+ true
 
     val detector = new IQRDetector(factor = 1.5)
     val result = detector.detect(testData)

diff --git a/kayenta-judge/src/test/scala/com/netflix/kayenta/judge/StatisticSuite.scala b/kayenta-judge/src/test/scala/com/netflix/kayenta/judge/StatisticSuite.scala
@@ -17,8 +17,9 @@
 package com.netflix.kayenta.judge
 
 import com.netflix.kayenta.judge.stats.{DescriptiveStatistics, MetricStatistics}
+import com.netflix.kayenta.judge.stats.DescriptiveStatistics.percentile
 import org.scalatest.FunSuite
-
+import org.scalatest.Matchers._
 
 class StatisticSuite extends FunSuite{
 
@@ -36,4 +37,49 @@ class StatisticSuite extends FunSuite{
     assert(result === truth)
   }
 
+  test("Basic Percentile Test"){
+    val testData = Array(0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5)
+    assert(percentile(testData, 5) === (0.175 +- 1.0e-4))
+    assert(percentile(testData, 50) === 1.75)
+    assert(percentile(testData, 100) === 3.5)
+  }
+
+  test("Basic Percentile Estimate Test (Linear Interpolation)") {
+    val testData = Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
+    assert(percentile(testData, 50) === 4.5)
+  }
+
+  test("Percentile NIST Test"){
+    val testData = Array(
+      95.1772, 95.1567, 95.1937, 95.1959, 95.1442, 95.0610,
+      95.1591, 95.1195, 95.1772, 95.0925, 95.1990, 95.1682
+    )
+    assert(percentile(testData, 90) === 95.19568)
+  }
+
+  test("Percentile Metric Object Test"){
+    val metric = Metric("test", Array[Double](1.0), "test")
+    assert(percentile(metric, 100) === 1.0)
+  }
+
+  test("Percentile Estimate Test (Linear Interpolation)"){
+    val testData = Array(
+      0.07142857142857144, 0.02083333333333332, 0.16666666666666666,
+      0.03448275862068966, 0.038461538461538464, 0.03225806451612904,
+      0.027777777777777773, 0.0, 0.23076923076923078, 0.10344827586206898,
+      0.04545454545454542, 0.0, 0.028571428571428564, 0.0, 0.0, 0.04, 0.0, 0.0,
+      0.05128205128205127, 0.10714285714285716, 0.0263157894736842,
+      0.04166666666666667, 0.09523809523809522, 0.02941176470588235,
+      0.024999999999999984, 0.0, 0.0, 0.023809523809523794, 0.0,
+      0.02564102564102563, 0.0, 0.0, 0.028571428571428564, 0.07142857142857144,
+      0.047619047619047596, 0.021276595744680833, 0.02564102564102563, 0.03125,
+      0.03125, 0.03125, 0.11363636363636356, 0.03571428571428572, 0.0,
+      0.02777777777777777, 0.0, 0.0, 0.055555555555555546, 0.028571428571428564,
+      0.03225806451612904
+    )
+
+    assert(percentile(testData, 25) === 0.0)
+    assert(percentile(testData, 75) === (0.0416 +- 1.0e-4))
+  }
+
 }