microsoft · StrikerRUS · Jun 27, 2022 · Apr 7, 2022 · Apr 7, 2022 · Apr 7, 2022
@@ -742,6 +742,26 @@ Booster <- R6::R6Class(
 #' @param object Object of class \code{lgb.Booster}
 #' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or
 #'                a character representing a path to a text file (CSV, TSV, or LibSVM)
+#' @param type Type of prediction to output. Allowed types are:\itemize{
 if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib): 
     _log_warning("Cannot compute class probabilities or labels " 
                  "due to the usage of customized objective function.\n" 
                  "Returning raw scores instead.") 
 if callable(self._objective) and not (raw_score or pred_leaf or pred_contrib): 
     _log_warning("Cannot compute class probabilities or labels " 
                  "due to the usage of customized objective function.\n" 
                  "Returning raw scores instead.") 
+#'             \item \code{"link"}: will output the predicted score according to the objective function being
+#'                   optimized (depending on the link function that the objective uses), after applying any necessary
+#'                   transformations - for example, for \code{objective="binary"}, it will output class probabilities.
+#'             \item \code{"response"}: for classification objectives, will output the class with the highest predicted
+#'                   probability. For other objectives, will output the same as "link".
+#'             \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations'
+#'                   results) from which the "link" number is produced for a given objective function - for example, for
+#'                   \code{objective="binary"}, this corresponds to log-odds. For many objectives such as "regression",
+#'                   since no transformation is applied, the output will be the same as for "link".
+#'             \item \code{"leaf"}: will output the index of the terminal node / leaf at which each observations falls
+#'                   in each tree in the model, outputted as integers, with one column per tree.
+#'             \item \code{"contrib"}: will return the per-feature contributions for each prediction, including an
+#'                   intercept (each feature will produce one column). If there are multiple classes, each class will
+#'                   have separate feature contributions (thus the number of columns is feaures+1 multiplied by the
-#'                   have separate feature contributions (thus the number of columns is feaures+1 multiplied by the
+#'                   have separate feature contributions (thus the number of columns is features+1 multiplied by the
-#'                   have separate feature contributions (thus the number of columns is feaures+1 multiplied by the
+#'                   have separate feature contributions (thus the number of columns is features+1 multiplied by the
+#'                   number of classes).
+#'             }
+#'
+#'             Note that, if using custom objectives, types "link" and "response" will not be available and will
+#'             default towards using "raw" instead.
 #' @param start_iteration int or None, optional (default=None)
 #'                        Start index of the iteration to predict.
 #'                        If None or <= 0, starts from the first iteration.
@@ -750,23 +770,20 @@ Booster <- R6::R6Class(
 #'                      If None, if the best iteration exists and start_iteration is None or <= 0, the
 #'                      best iteration is used; otherwise, all iterations from start_iteration are used.
 #'                      If <= 0, all iterations from start_iteration are used (no limits).
-#' @param rawscore whether the prediction should be returned in the for of original untransformed
-#'                 sum of predictions from boosting iterations' results. E.g., setting \code{rawscore=TRUE}
-#'                 for logistic regression would result in predictions for log-odds instead of probabilities.
-#' @param predleaf whether predict leaf index instead.
-#' @param predcontrib return per-feature contributions for each record.
 #' @param header only used for prediction for text file. True if text file has header
 #' @param params a list of additional named parameters. See
 #'               \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{
 #'               the "Predict Parameters" section of the documentation} for a list of parameters and
 #'               valid values. Where these conflict with the values of keyword arguments to this function,
 #'               the values in \code{params} take precedence.
 #' @param ... ignored
-#' @return For regression or binary classification, it returns a vector of length \code{nrows(data)}.
-#'         For multiclass classification, it returns a matrix of dimensions \code{(nrows(data), num_class)}.
+#' @return For prediction types that are meant to always return one output per observation (e.g. when predicting
+#'         \code{type="link"} on a binary classification or regression objective), will return a vector with one
+#'         row per observation in \code{newdata}.
 #'
-#'         When passing \code{predleaf=TRUE} or \code{predcontrib=TRUE}, the output will always be
-#'         returned as a matrix.
+#'         For prediction types that are meant to return more than one output per observation (e.g. when predicting
+#'         \code{type="link"} on a multi-class objective, or when predicting \code{type="leaf"}, regardless of
+#'         objective), will return a matrix with one row per observation in \code{newdata} and one column per output.
 #'
 #' @examples
 #' \donttest{
@@ -804,11 +821,9 @@ Booster <- R6::R6Class(
 #' @export
 predict.lgb.Booster <- function(object,
                                 newdata,
+                                type = "link",
                                 start_iteration = NULL,
                                 num_iteration = NULL,
-                                rawscore = FALSE,
-                                predleaf = FALSE,
 if ("reshape" %in% names(additional_params)) { 
   stop("'reshape' argument is no longer supported.") 
 } 
 if ("reshape" %in% names(additional_params)) { 
   stop("'reshape' argument is no longer supported.") 
 } 
-                                predcontrib = FALSE,
                                 header = FALSE,
                                 params = list(),
                                 ...) {
@@ -819,28 +834,65 @@ predict.lgb.Booster <- function(object,
 
   additional_params <- list(...)
   if (length(additional_params) > 0L) {
-    if ("reshape" %in% names(additional_params)) {
+    additional_params_names <- names(additional_params)
+    if ("reshape" %in% additional_params_names) {
       stop("'reshape' argument is no longer supported.")
     }
+
+    old_args_for_type <- list(
+      "rawscore" = "raw"
+      , "predleaf" = "leaf"
+      , "predcontrib" = "contrib"
+    )
+    for (arg in names(old_args_for_type)) {
+      if (arg %in% additional_params_names) {
+        stop(sprintf("Argument '%s' is no longer supported. Use type='%s' instead."
+                     , arg
+                     , old_args_for_type[[arg]]))
+      }
+    }
+
     warning(paste0(
       "predict.lgb.Booster: Found the following passed through '...': "
-      , paste(names(additional_params), collapse = ", ")
+      , paste(additional_params_names, collapse = ", ")
       , ". These are ignored. Use argument 'params' instead."
     ))
   }
 
-  return(
-    object$predict(
-      data = newdata
-      , start_iteration = start_iteration
-      , num_iteration = num_iteration
-      , rawscore = rawscore
-      , predleaf =  predleaf
-      , predcontrib =  predcontrib
-      , header = header
-      , params = params
-    )
+  if (!is.null(object$params$objective) && object$params$objective == "none" && type %in% c("link", "response")) {
+    warning("Prediction types 'link' and 'response' are not supported for custom objectives.")
+    type <- "raw"
+  }
+
+  rawscore <- FALSE
+  predleaf <- FALSE
+  predcontrib <- FALSE
+  if (type == "raw") {
+    rawscore <- TRUE
+  } else if (type == "leaf") {
+    predleaf <- TRUE
+  } else if (type == "contrib") {
+    predcontrib <- TRUE
+  }
+
+  pred <- object$predict(
+    data = newdata
+    , start_iteration = start_iteration
+    , num_iteration = num_iteration
+    , rawscore = rawscore
+    , predleaf =  predleaf
+    , predcontrib =  predcontrib
+    , header = header
+    , params = params
   )
+  if (type == "response") {
+    if (object$params$objective == "binary") {
+      pred <- as.integer(pred >= 0.5)
+    } else if (object$params$objective %in% c("multiclass", "multiclassova")) {
+      pred <- max.col(pred) - 1L
+    }
+  }
+  return(pred)
 }
 
 #' @name print.lgb.Booster

@@ -22,8 +22,8 @@ param <- list(
 bst <- lgb.train(param, dtrain, 1L, valids = valids)
 
 # Note: we need the margin value instead of transformed prediction in set_init_score
-ptrain <- predict(bst, agaricus.train$data, rawscore = TRUE)
-ptest  <- predict(bst, agaricus.test$data, rawscore = TRUE)
+ptrain <- predict(bst, agaricus.train$data, type = "raw")
+ptest  <- predict(bst, agaricus.test$data, type = "raw")
 
 # set the init_score property of dtrain and dtest
 # base margin is the base prediction we will boost from

@@ -111,7 +111,7 @@ new_data <- data.frame(
     X = rowMeans(predict(
         model
         , agaricus.test$data
-        , predleaf = TRUE
+        , type = "leaf"
     ))
     , Y = pmin(
         pmax(
@@ -162,7 +162,7 @@ new_data2 <- data.frame(
     X = rowMeans(predict(
         model2
         , agaricus.test$data
-        , predleaf = TRUE
+        , type = "leaf"
     ))
     , Y = pmin(
         pmax(
@@ -218,7 +218,7 @@ new_data3 <- data.frame(
     X = rowMeans(predict(
         model3
         , agaricus.test$data
-        , predleaf = TRUE
+        , type = "leaf"
     ))
     , Y = pmin(
         pmax(

@@ -64,7 +64,7 @@ my_preds <- predict(model, test[, 1L:4L])
 my_preds <- predict(model, test[, 1L:4L])
 
 # We can also get the predicted scores before the Sigmoid/Softmax application
-my_preds <- predict(model, test[, 1L:4L], rawscore = TRUE)
+my_preds <- predict(model, test[, 1L:4L], type = "raw")
 
 # We can also get the leaf index
-my_preds <- predict(model, test[, 1L:4L], predleaf = TRUE)
+my_preds <- predict(model, test[, 1L:4L], type = "leaf")
@@ -36,7 +36,7 @@ model_builtin <- lgb.train(
     , obj = "multiclass"
 )
 
-preds_builtin <- predict(model_builtin, test[, 1L:4L], rawscore = TRUE)
+preds_builtin <- predict(model_builtin, test[, 1L:4L], type = "raw")
 probs_builtin <- exp(preds_builtin) / rowSums(exp(preds_builtin))
 
 # Method 2 of training with custom objective function
@@ -109,7 +109,7 @@ model_custom <- lgb.train(
     , eval = custom_multiclass_metric
 )
 
-preds_custom <- predict(model_custom, test[, 1L:4L], rawscore = TRUE)
+preds_custom <- predict(model_custom, test[, 1L:4L], type = "raw")
 probs_custom <- exp(preds_custom) / rowSums(exp(preds_custom))
 
 # compare predictions