From a728667c7aca1638d44ff9442806bbb070a91305 Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Wed, 19 Jun 2019 10:49:54 -0700 Subject: [PATCH 1/5] Lets add forecast support into TMOG --- .../op/evaluators/EvaluationMetrics.scala | 16 +++ .../op/evaluators/OpForecastEvaluator.scala | 111 ++++++++++++++++++ .../evaluators/OpForecastEvaluatorTest.scala | 101 ++++++++++++++++ .../OpRegressionEvaluatorTest.scala | 1 + 4 files changed, 229 insertions(+) create mode 100644 core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala create mode 100644 core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala diff --git a/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala b/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala index f227cd79b0..8f61332cd7 100644 --- a/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala +++ b/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala @@ -176,6 +176,22 @@ object RegressionEvalMetrics extends Enum[RegressionEvalMetric] { } +/** + * Forecast Metrics + */ +sealed abstract class ForecastEvalMetric +( + val sparkEntryName: String, + val humanFriendlyName: String +) extends EvalMetric + + +object ForecastEvalMetrics extends Enum[RegressionEvalMetric] { + val values: Seq[RegressionEvalMetric] = findValues + case object sMAPE extends RegressionEvalMetric("smape", "symmetric Mean Absolute Percentage Error") +} + + /** * GeneralMetrics */ diff --git a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala new file mode 100644 index 0000000000..62a9147e2a --- /dev/null +++ b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2017, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.op.evaluators + +import com.salesforce.op.UID +import com.salesforce.op.utils.spark.RichEvaluator._ +import org.apache.spark.ml.evaluation.RegressionEvaluator +import org.apache.spark.sql.Dataset +import org.slf4j.LoggerFactory + +/** + * + * Instance to evaluate Regression metrics + * The metrics are rmse, mse, r2 and mae + * Default evaluation returns Root Mean Squared Error + * + * @param name name of default metric + * @param isLargerBetter is metric better if larger + * @param uid uid for instance + */ + +private[op] class OpForecastEvaluator +( + override val name: EvalMetric = OpEvaluatorNames.Regression, + override val isLargerBetter: Boolean = false, + override val uid: String = UID[OpRegressionEvaluator] +) extends OpRegressionEvaluatorBase[ForecastMetrics](uid) { + + @transient private lazy val log = LoggerFactory.getLogger(this.getClass) + + def getDefaultMetric: ForecastMetrics => Double = _.sMAPE + + override def evaluateAll(data: Dataset[_]): ForecastMetrics = { + val dataUse = makeDataToUse(data, getLabelCol) + + val smape: Double = getSMAPE(dataUse, getLabelCol, getPredictionValueCol) + val metrics = ForecastMetrics( + sMAPE = smape + ) + + log.info("Evaluated metrics: {}", metrics.toString) + metrics + + } + + protected def getSMAPE(data: Dataset[_], labelCol: String, predictionValueCol: String): Double = { + data.select(labelCol, predictionValueCol).rdd + .map(r => ReduceSMAPE(r.getAs[Double](0), r.getAs[Double](1))) + .reduce(_ + _).metric + } +} + +// https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf +case class ReduceSMAPE(nominator: Double, denominator: Double, cnt: Long) { + def +(that: ReduceSMAPE): ReduceSMAPE = { + ReduceSMAPE(this.nominator + that.nominator, this.denominator + that.denominator, this.cnt + that.cnt) + } + + def metric: Double = { + if (denominator == 0.0) { + Double.PositiveInfinity + } else { + (nominator / denominator) / cnt + } + } +} + +object ReduceSMAPE { + def apply(y: Double, y_hat: Double): ReduceSMAPE = { + ReduceSMAPE(2 * Math.abs(y - y_hat), Math.abs(y) + Math.abs(y_hat), 1L) + } +} + +/** + * Metrics of Regression Problem + * + * @param sMAPE symmetric Mean Absolute Percentage Error + * + */ +case class ForecastMetrics +( + sMAPE: Double +) extends EvaluationMetrics diff --git a/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala b/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala new file mode 100644 index 0000000000..671a3c0122 --- /dev/null +++ b/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017, Salesforce.com, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * * Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package com.salesforce.op.evaluators + +import com.salesforce.op.features.types._ +import com.salesforce.op.stages.impl.classification.OpLogisticRegression +import com.salesforce.op.stages.impl.regression.{OpLinearRegression, RegressionModelSelector} +import com.salesforce.op.test.{TestFeatureBuilder, TestSparkContext} +import org.apache.spark.ml.linalg.Vectors +import org.apache.spark.ml.param.ParamMap +import org.apache.spark.ml.tuning.ParamGridBuilder +import org.junit.runner.RunWith +import org.scalatest.FlatSpec +import org.scalatest.junit.JUnitRunner + +@RunWith(classOf[JUnitRunner]) +class OpForecastEvaluatorTest extends FlatSpec with TestSparkContext { + + val (ds, rawLabel, features) = TestFeatureBuilder[RealNN, OPVector]( + Seq( + (12.0, Vectors.dense(1.0, 4.3, 1.3)), + (22.0, Vectors.dense(2.0, 0.3, 0.1)), + (32.0, Vectors.dense(3.0, 3.9, 4.3)), + (42.0, Vectors.dense(4.0, 1.3, 0.9)), + (52.0, Vectors.dense(5.0, 4.7, 1.3)), + (17.0, Vectors.dense(1.0, 4.3, 1.3)), + (27.0, Vectors.dense(2.0, 0.3, 0.1)), + (37.0, Vectors.dense(3.0, 3.9, 4.3)), + (47.0, Vectors.dense(4.0, 1.3, 0.9)), + (57.0, Vectors.dense(5.0, 4.7, 1.3)) + ).map(v => v._1.toRealNN -> v._2.toOPVector) + ) + + val label = rawLabel.copy(isResponse = true) + + val lr = new OpLogisticRegression() + val lrParams = new ParamGridBuilder().addGrid(lr.regParam, Array(0.0)).build() + + val testEstimator = RegressionModelSelector.withTrainValidationSplit(dataSplitter = None, trainRatio = .5, + modelsAndParameters = Seq(lr -> lrParams), seed = 1239871928731L) + .setInput(label, features) + + val prediction = testEstimator.getOutput() + val testEvaluator = new OpForecastEvaluator().setLabelCol(label).setPredictionCol(prediction) + + val testEstimator2 = new OpLinearRegression().setInput(label, features) + + val prediction2 = testEstimator2.getOutput() + val testEvaluator2 = new OpForecastEvaluator().setLabelCol(label).setPredictionCol(prediction2) + + + Spec[OpForecastEvaluator] should "copy" in { + val testEvaluatorCopy = testEvaluator.copy(ParamMap()) + testEvaluatorCopy.uid shouldBe testEvaluator.uid + } + + it should "evaluate the metrics from a model selector" in { + val model = testEstimator.fit(ds) + val transformedData = model.setInput(label, features).transform(ds) + val metrics = testEvaluator.evaluateAll(transformedData).toMetadata() + + metrics.getDouble(ForecastEvalMetrics.sMAPE.toString) shouldBe (0.0075 +- 1e-4) + + } + + it should "evaluate the metrics from a single model" in { + val model = testEstimator2.fit(ds) + val transformedData = model.setInput(label, features).transform(ds) + val metrics = testEvaluator2.evaluateAll(transformedData).toMetadata() + + metrics.getDouble(ForecastEvalMetrics.sMAPE.toString) shouldBe (0.0072 +- 1e-4) + } +} diff --git a/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala b/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala index 5081110bff..84332ced6b 100644 --- a/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala +++ b/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala @@ -97,6 +97,7 @@ class OpRegressionEvaluatorTest extends FlatSpec with TestSparkContext { it should "evaluate the metrics from a single model" in { val model = testEstimator2.fit(ds) val transformedData = model.setInput(label, features).transform(ds) + transformedData.show(10) val metrics = testEvaluator2.evaluateAll(transformedData).toMetadata() assert(metrics.getDouble(RegressionEvalMetrics.RootMeanSquaredError.toString) <= 1E-12, "rmse should be close to 0") From 736c7c0ba927033e1fe2d84a51d92d07866c716b Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Wed, 19 Jun 2019 10:51:41 -0700 Subject: [PATCH 2/5] removed debug --- .../com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala b/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala index 84332ced6b..5081110bff 100644 --- a/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala +++ b/core/src/test/scala/com/salesforce/op/evaluators/OpRegressionEvaluatorTest.scala @@ -97,7 +97,6 @@ class OpRegressionEvaluatorTest extends FlatSpec with TestSparkContext { it should "evaluate the metrics from a single model" in { val model = testEstimator2.fit(ds) val transformedData = model.setInput(label, features).transform(ds) - transformedData.show(10) val metrics = testEvaluator2.evaluateAll(transformedData).toMetadata() assert(metrics.getDouble(RegressionEvalMetrics.RootMeanSquaredError.toString) <= 1E-12, "rmse should be close to 0") From a18845dd584b22f1f5b61e75c2c331c7e1ded618 Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Fri, 21 Jun 2019 10:27:41 -0700 Subject: [PATCH 3/5] fixes --- .../op/evaluators/EvaluationMetrics.scala | 8 ++-- .../op/evaluators/OpForecastEvaluator.scala | 48 +++++++++---------- .../evaluators/OpForecastEvaluatorTest.scala | 12 ++++- 3 files changed, 39 insertions(+), 29 deletions(-) diff --git a/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala b/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala index 8f61332cd7..186a69a2cd 100644 --- a/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala +++ b/core/src/main/scala/com/salesforce/op/evaluators/EvaluationMetrics.scala @@ -110,6 +110,7 @@ object EvalMetric { BinaryClassEvalMetrics.withNameInsensitiveOption(name) .orElse(MultiClassEvalMetrics.withNameInsensitiveOption(name)) .orElse(RegressionEvalMetrics.withNameInsensitiveOption(name)) + .orElse(ForecastEvalMetrics.withNameInsensitiveOption(name)) .orElse(OpEvaluatorNames.withNameInsensitiveOption(name)) .getOrElse(OpEvaluatorNames.Custom(name, name)) } @@ -186,9 +187,9 @@ sealed abstract class ForecastEvalMetric ) extends EvalMetric -object ForecastEvalMetrics extends Enum[RegressionEvalMetric] { - val values: Seq[RegressionEvalMetric] = findValues - case object sMAPE extends RegressionEvalMetric("smape", "symmetric Mean Absolute Percentage Error") +object ForecastEvalMetrics extends Enum[ForecastEvalMetric] { + val values: Seq[ForecastEvalMetric] = findValues + case object SMAPE extends ForecastEvalMetric("smape", "symmetric mean absolute percentage error") } @@ -210,6 +211,7 @@ object OpEvaluatorNames extends Enum[OpEvaluatorNames] { case object BinScore extends OpEvaluatorNames("binScoreEval", "bin score evaluation metrics") case object Multi extends OpEvaluatorNames("multiEval", "multiclass evaluation metrics") case object Regression extends OpEvaluatorNames("regEval", "regression evaluation metrics") + case object Forecast extends OpEvaluatorNames("regForecast", "regression evaluation metrics") case class Custom(name: String, humanName: String) extends OpEvaluatorNames(name, humanName) { override def entryName: String = name.toLowerCase } diff --git a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala index 62a9147e2a..b45cf37d6b 100644 --- a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala +++ b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala @@ -35,12 +35,15 @@ import com.salesforce.op.utils.spark.RichEvaluator._ import org.apache.spark.ml.evaluation.RegressionEvaluator import org.apache.spark.sql.Dataset import org.slf4j.LoggerFactory +import com.twitter.algebird.Operators._ +import com.twitter.algebird.Semigroup +import com.twitter.algebird.macros.caseclass /** * - * Instance to evaluate Regression metrics - * The metrics are rmse, mse, r2 and mae - * Default evaluation returns Root Mean Squared Error + * Instance to evaluate Forecast metrics + * The metrics are SMAPE + * Default evaluation returns SMAPE * * @param name name of default metric * @param isLargerBetter is metric better if larger @@ -49,63 +52,60 @@ import org.slf4j.LoggerFactory private[op] class OpForecastEvaluator ( - override val name: EvalMetric = OpEvaluatorNames.Regression, + override val name: EvalMetric = OpEvaluatorNames.Forecast, override val isLargerBetter: Boolean = false, - override val uid: String = UID[OpRegressionEvaluator] + override val uid: String = UID[OpForecastEvaluator] ) extends OpRegressionEvaluatorBase[ForecastMetrics](uid) { @transient private lazy val log = LoggerFactory.getLogger(this.getClass) - def getDefaultMetric: ForecastMetrics => Double = _.sMAPE + def getDefaultMetric: ForecastMetrics => Double = _.SMAPE override def evaluateAll(data: Dataset[_]): ForecastMetrics = { val dataUse = makeDataToUse(data, getLabelCol) val smape: Double = getSMAPE(dataUse, getLabelCol, getPredictionValueCol) - val metrics = ForecastMetrics( - sMAPE = smape - ) + val metrics = ForecastMetrics(SMAPE = smape) log.info("Evaluated metrics: {}", metrics.toString) metrics } + + protected def getSMAPE(data: Dataset[_], labelCol: String, predictionValueCol: String): Double = { data.select(labelCol, predictionValueCol).rdd - .map(r => ReduceSMAPE(r.getAs[Double](0), r.getAs[Double](1))) - .reduce(_ + _).metric + .map(r => SMAPEValue(r.getAs[Double](0), r.getAs[Double](1))) + .reduce(_ + _).value } } // https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf -case class ReduceSMAPE(nominator: Double, denominator: Double, cnt: Long) { - def +(that: ReduceSMAPE): ReduceSMAPE = { - ReduceSMAPE(this.nominator + that.nominator, this.denominator + that.denominator, this.cnt + that.cnt) +object SMAPEValue { + def apply(y: Double, yHat: Double): SMAPEValue = { + SMAPEValue(2 * Math.abs(y - yHat), Math.abs(y) + Math.abs(yHat), 1L) } + implicit val smapeSG: Semigroup[SMAPEValue] = caseclass.semigroup[SMAPEValue] +} - def metric: Double = { +case class SMAPEValue private (nominator: Double, denominator: Double, cnt: Long) { + def value: Double = { if (denominator == 0.0) { - Double.PositiveInfinity + Double.NaN } else { (nominator / denominator) / cnt } } } -object ReduceSMAPE { - def apply(y: Double, y_hat: Double): ReduceSMAPE = { - ReduceSMAPE(2 * Math.abs(y - y_hat), Math.abs(y) + Math.abs(y_hat), 1L) - } -} - /** * Metrics of Regression Problem * - * @param sMAPE symmetric Mean Absolute Percentage Error + * @param SMAPE symmetric Mean Absolute Percentage Error * */ case class ForecastMetrics ( - sMAPE: Double + SMAPE: Double ) extends EvaluationMetrics diff --git a/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala b/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala index 671a3c0122..7a5314b972 100644 --- a/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala +++ b/core/src/test/scala/com/salesforce/op/evaluators/OpForecastEvaluatorTest.scala @@ -87,7 +87,7 @@ class OpForecastEvaluatorTest extends FlatSpec with TestSparkContext { val transformedData = model.setInput(label, features).transform(ds) val metrics = testEvaluator.evaluateAll(transformedData).toMetadata() - metrics.getDouble(ForecastEvalMetrics.sMAPE.toString) shouldBe (0.0075 +- 1e-4) + metrics.getDouble(ForecastEvalMetrics.SMAPE.toString) shouldBe (0.0075 +- 1e-4) } @@ -95,7 +95,15 @@ class OpForecastEvaluatorTest extends FlatSpec with TestSparkContext { val model = testEstimator2.fit(ds) val transformedData = model.setInput(label, features).transform(ds) val metrics = testEvaluator2.evaluateAll(transformedData).toMetadata() + metrics.getDouble(ForecastEvalMetrics.SMAPE.toString) shouldBe (0.0072 +- 1e-4) + } - metrics.getDouble(ForecastEvalMetrics.sMAPE.toString) shouldBe (0.0072 +- 1e-4) + it should "evaluate the metrics when data is 0" in { + val data = Seq(0.0, 0.0, 0.0).map(x => (x, Map("prediction" -> x))) + import spark.implicits._ + val df = spark.sparkContext.parallelize(data).toDF("f1", "r1") + val metrics = new OpForecastEvaluator().setLabelCol("f1").setPredictionCol("r1").evaluateAll(df).toMetadata() + metrics.getDouble(ForecastEvalMetrics.SMAPE.toString).isNaN shouldBe true } + } From e2b7e142984f54037115c67cd36232c769ef5877 Mon Sep 17 00:00:00 2001 From: Christopher Suchanek Date: Fri, 21 Jun 2019 10:44:18 -0700 Subject: [PATCH 4/5] docs fix --- .../com/salesforce/op/evaluators/OpForecastEvaluator.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala index b45cf37d6b..cfc4e349c8 100644 --- a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala +++ b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala @@ -100,7 +100,7 @@ case class SMAPEValue private (nominator: Double, denominator: Double, cnt: Long } /** - * Metrics of Regression Problem + * Metrics of Forecasting Problem * * @param SMAPE symmetric Mean Absolute Percentage Error * From 14843704b55a4223aac4e4e1e0b5ecaae5bd59c0 Mon Sep 17 00:00:00 2001 From: Matthew Tovbin Date: Sat, 22 Jun 2019 08:23:30 -0700 Subject: [PATCH 5/5] Update OpForecastEvaluator.scala --- .../op/evaluators/OpForecastEvaluator.scala | 26 ++++++------------- 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala index cfc4e349c8..dcc8739f74 100644 --- a/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala +++ b/core/src/main/scala/com/salesforce/op/evaluators/OpForecastEvaluator.scala @@ -40,7 +40,6 @@ import com.twitter.algebird.Semigroup import com.twitter.algebird.macros.caseclass /** - * * Instance to evaluate Forecast metrics * The metrics are SMAPE * Default evaluation returns SMAPE @@ -49,7 +48,6 @@ import com.twitter.algebird.macros.caseclass * @param isLargerBetter is metric better if larger * @param uid uid for instance */ - private[op] class OpForecastEvaluator ( override val name: EvalMetric = OpEvaluatorNames.Forecast, @@ -69,11 +67,8 @@ private[op] class OpForecastEvaluator log.info("Evaluated metrics: {}", metrics.toString) metrics - } - - protected def getSMAPE(data: Dataset[_], labelCol: String, predictionValueCol: String): Double = { data.select(labelCol, predictionValueCol).rdd .map(r => SMAPEValue(r.getAs[Double](0), r.getAs[Double](1))) @@ -81,7 +76,12 @@ private[op] class OpForecastEvaluator } } -// https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf +/** + * SMAPE value computation. See formula here: + * https://www.m4.unic.ac.cy/wp-content/uploads/2018/03/M4-Competitors-Guide.pdf + * + * @param SMAPE symmetric Mean Absolute Percentage Error + */ object SMAPEValue { def apply(y: Double, yHat: Double): SMAPEValue = { SMAPEValue(2 * Math.abs(y - yHat), Math.abs(y) + Math.abs(yHat), 1L) @@ -90,22 +90,12 @@ object SMAPEValue { } case class SMAPEValue private (nominator: Double, denominator: Double, cnt: Long) { - def value: Double = { - if (denominator == 0.0) { - Double.NaN - } else { - (nominator / denominator) / cnt - } - } + def value: Double = if (denominator == 0.0) Double.NaN else (nominator / denominator) / cnt } /** * Metrics of Forecasting Problem * * @param SMAPE symmetric Mean Absolute Percentage Error - * */ -case class ForecastMetrics -( - SMAPE: Double -) extends EvaluationMetrics +case class ForecastMetrics(SMAPE: Double) extends EvaluationMetrics