Skip to content

Commit

Permalink
Changed GBTRegressor so it does NOT threshold the prediction
Browse files Browse the repository at this point in the history
  • Loading branch information
jkbradley committed Jun 30, 2015
1 parent 74cc16d commit 613b90e
Show file tree
Hide file tree
Showing 2 changed files with 23 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,7 @@ final class GBTRegressionModel(
// TODO: When we add a generic Boosting class, handle transform there? SPARK-7129
// Classifies by thresholding sum of weighted tree predictions
val treePredictions = _trees.map(_.rootNode.predict(features))
val prediction = blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
if (prediction > 0.0) 1.0 else 0.0
blas.ddot(numTrees, treePredictions, 1, _treeWeights, 1)
}

override def copy(extra: ParamMap): GBTRegressionModel = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,12 +19,13 @@ package org.apache.spark.ml.regression

import org.apache.spark.SparkFunSuite
import org.apache.spark.ml.impl.TreeTests
import org.apache.spark.mllib.linalg.Vectors
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.tree.{EnsembleTestHelper, GradientBoostedTrees => OldGBT}
import org.apache.spark.mllib.tree.configuration.{Algo => OldAlgo}
import org.apache.spark.mllib.util.MLlibTestSparkContext
import org.apache.spark.rdd.RDD
import org.apache.spark.sql.DataFrame
import org.apache.spark.sql.{DataFrame, Row}


/**
Expand Down Expand Up @@ -67,6 +68,26 @@ class GBTRegressorSuite extends SparkFunSuite with MLlibTestSparkContext {
}
}

test("GBTRegressor behaves reasonably on toy data") {
val df = sqlContext.createDataFrame(Seq(
LabeledPoint(10, Vectors.dense(1, 2, 3, 4)),
LabeledPoint(-5, Vectors.dense(6, 3, 2, 1)),
LabeledPoint(11, Vectors.dense(2, 2, 3, 4)),
LabeledPoint(-6, Vectors.dense(6, 4, 2, 1)),
LabeledPoint(9, Vectors.dense(1, 2, 6, 4)),
LabeledPoint(-4, Vectors.dense(6, 3, 2, 2))
))
val gbt = new GBTRegressor()
.setMaxDepth(2)
.setMaxIter(2)
val model = gbt.fit(df)
val preds = model.transform(df)
val predictions = preds.select("prediction").map(_.getDouble(0))
// Checks based on SPARK-8736 (to ensure it is not doing classification)
assert(predictions.max() > 2)
assert(predictions.min() < -1)
}

// TODO: Reinstate test once runWithValidation is implemented SPARK-7132
/*
test("runWithValidation stops early and performs better on a validation dataset") {
Expand Down

0 comments on commit 613b90e

Please sign in to comment.