Skip to content

Commit

Permalink
Use the R-7 estimation strategy when computing percentiles (#97)
Browse files Browse the repository at this point in the history
  • Loading branch information
csanden authored and Michael Graff committed Oct 3, 2017
1 parent 12fb29f commit ab9549a
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.netflix.kayenta.judge.detectors

import org.apache.commons.math3.stat.StatUtils
import com.netflix.kayenta.judge.stats.DescriptiveStatistics.percentile

/**
* Interquartile Range Detector
Expand All @@ -39,8 +39,8 @@ class IQRDetector(factor: Double = 1.5, reduceSensitivity: Boolean = false) exte
*/
private def calculateIQR(data: Array[Double]): (Double, Double) = {
//Calculate the 25th and 75th percentiles
val p75 = StatUtils.percentile(data, 75)
val p25 = StatUtils.percentile(data, 25)
val p75 = percentile(data, 75)
val p25 = percentile(data, 25)

//Calculate the Interquartile Range (IQR)
val iqr = p75-p25
Expand All @@ -60,8 +60,8 @@ class IQRDetector(factor: Double = 1.5, reduceSensitivity: Boolean = false) exte
val (lowerIQR, upperIQR) = calculateIQR(data)

//Calculate the 1st and 99th percentiles
val p01 = StatUtils.percentile(data, 1)
val p99 = StatUtils.percentile(data, 99)
val p01 = percentile(data, 1)
val p99 = percentile(data, 99)

//Calculate the upper and lower fences
val lowerFence = math.min(p01, lowerIQR)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,25 +18,50 @@ package com.netflix.kayenta.judge.stats

import com.netflix.kayenta.judge.Metric
import org.apache.commons.math3.stat.StatUtils
import org.apache.commons.math3.stat.descriptive.rank.Percentile
import org.apache.commons.math3.stat.descriptive.rank.Percentile.EstimationType

case class MetricStatistics(min: Double, max: Double, mean: Double, median: Double, count: Int)

object DescriptiveStatistics {

def mean(metric: Metric): Double = {
if(metric.values.isEmpty) 0.0 else StatUtils.mean(metric.values)
if (metric.values.isEmpty) 0.0 else StatUtils.mean(metric.values)
}

def median(metric: Metric): Double = {
if(metric.values.isEmpty) 0.0 else StatUtils.percentile(metric.values, 50)
if (metric.values.isEmpty) 0.0 else StatUtils.percentile(metric.values, 50)
}

def min(metric: Metric): Double = {
if(metric.values.isEmpty) 0.0 else StatUtils.min(metric.values)
if (metric.values.isEmpty) 0.0 else StatUtils.min(metric.values)
}

def max(metric: Metric): Double = {
if(metric.values.isEmpty) 0.0 else StatUtils.max(metric.values)
if (metric.values.isEmpty) 0.0 else StatUtils.max(metric.values)
}

/**
* Returns an estimate of the pth percentile of the values in the metric object.
* Uses the R-7 estimation strategy when the desired percentile lies between two data points.
* @param metric input metric
* @param p the percentile value to compute
* @return the percentile value or Double.NaN if the metric is empty
*/
def percentile(metric: Metric, p: Double): Double ={
this.percentile(metric.values, p)
}

/**
* Returns an estimate of the pth percentile of the values in the values array.
* Uses the R-7 estimation strategy when the desired percentile lies between two data points.
* @param values input array of values
* @param p the percentile value to compute
* @return the percentile value or Double.NaN if the array is empty
*/
def percentile(values: Array[Double], p: Double): Double ={
val percentile = new Percentile().withEstimationType(EstimationType.R_7)
percentile.evaluate(values, p)
}

def summary(metric: Metric): MetricStatistics = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,18 +59,25 @@ class DetectorSuite extends FunSuite{
assert(result === truth)
}

test("IQR Reduce Sensitivity"){
val testData = Array(1.0, 1.0, 1.0, 1.0, 1.0, 20.0, 1.0, 1.0, 1.0, 1.0, 1.0)
val truth = Array(false, false, false, false, false, false, false, false, false, false, false)
test("IQR Empty Data"){
val testData = Array[Double]()
val truth = Array[Boolean]()

val detector = new IQRDetector(factor = 3.0, reduceSensitivity=true)
val detector = new IQRDetector(factor = 1.5)
val result = detector.detect(testData)
assert(result === truth)
}

test("IQR Empty Data"){
val testData = Array[Double]()
val truth = Array[Boolean]()
test("IQR NIST Test"){
val testData = Array[Double](
30, 171, 184, 201, 212, 250, 265, 270, 272, 289, 305, 306, 322, 322, 336, 346,
351, 370, 390, 404, 409, 411, 436, 437, 439, 441, 444, 448, 451, 453, 470, 480,
482, 487, 494, 495, 499, 503, 514, 521, 522, 527, 548, 550, 559, 560, 570, 572,
574, 578, 585, 592, 592, 607, 616, 618, 621, 629, 637, 638, 640, 656, 668, 707,
709, 719, 737, 739, 752, 758, 766, 792, 792, 794, 802, 818, 830, 832, 843, 858,
860, 869, 918, 925, 953, 991, 1000, 1005, 1068, 1441
)
val truth = Array.fill[Boolean](testData.length - 1)(false) :+ true

val detector = new IQRDetector(factor = 1.5)
val result = detector.detect(testData)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,9 @@
package com.netflix.kayenta.judge

import com.netflix.kayenta.judge.stats.{DescriptiveStatistics, MetricStatistics}
import com.netflix.kayenta.judge.stats.DescriptiveStatistics.percentile
import org.scalatest.FunSuite

import org.scalatest.Matchers._

class StatisticSuite extends FunSuite{

Expand All @@ -36,4 +37,49 @@ class StatisticSuite extends FunSuite{
assert(result === truth)
}

test("Basic Percentile Test"){
val testData = Array(0.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5)
assert(percentile(testData, 5) === (0.175 +- 1.0e-4))
assert(percentile(testData, 50) === 1.75)
assert(percentile(testData, 100) === 3.5)
}

test("Basic Percentile Estimate Test (Linear Interpolation)") {
val testData = Array(0.0, 1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0)
assert(percentile(testData, 50) === 4.5)
}

test("Percentile NIST Test"){
val testData = Array(
95.1772, 95.1567, 95.1937, 95.1959, 95.1442, 95.0610,
95.1591, 95.1195, 95.1772, 95.0925, 95.1990, 95.1682
)
assert(percentile(testData, 90) === 95.19568)
}

test("Percentile Metric Object Test"){
val metric = Metric("test", Array[Double](1.0), "test")
assert(percentile(metric, 100) === 1.0)
}

test("Percentile Estimate Test (Linear Interpolation)"){
val testData = Array(
0.07142857142857144, 0.02083333333333332, 0.16666666666666666,
0.03448275862068966, 0.038461538461538464, 0.03225806451612904,
0.027777777777777773, 0.0, 0.23076923076923078, 0.10344827586206898,
0.04545454545454542, 0.0, 0.028571428571428564, 0.0, 0.0, 0.04, 0.0, 0.0,
0.05128205128205127, 0.10714285714285716, 0.0263157894736842,
0.04166666666666667, 0.09523809523809522, 0.02941176470588235,
0.024999999999999984, 0.0, 0.0, 0.023809523809523794, 0.0,
0.02564102564102563, 0.0, 0.0, 0.028571428571428564, 0.07142857142857144,
0.047619047619047596, 0.021276595744680833, 0.02564102564102563, 0.03125,
0.03125, 0.03125, 0.11363636363636356, 0.03571428571428572, 0.0,
0.02777777777777777, 0.0, 0.0, 0.055555555555555546, 0.028571428571428564,
0.03225806451612904
)

assert(percentile(testData, 25) === 0.0)
assert(percentile(testData, 75) === (0.0416 +- 1.0e-4))
}

}

0 comments on commit ab9549a

Please sign in to comment.