diff --git a/R-package/NAMESPACE b/R-package/NAMESPACE index d8ed636c0208..e07af84d8824 100644 --- a/R-package/NAMESPACE +++ b/R-package/NAMESPACE @@ -16,6 +16,7 @@ export(lgb.Dataset.create.valid) export(lgb.Dataset.save) export(lgb.Dataset.set.categorical) export(lgb.Dataset.set.reference) +export(lgb.configure_fast_predict) export(lgb.convert_with_rules) export(lgb.cv) export(lgb.drop_serialized) @@ -37,6 +38,8 @@ export(saveRDS.lgb.Booster) export(set_field) export(slice) import(methods) +importClassesFrom(Matrix,CsparseMatrix) +importClassesFrom(Matrix,RsparseMatrix) importClassesFrom(Matrix,dgCMatrix) importClassesFrom(Matrix,dgRMatrix) importClassesFrom(Matrix,dsparseMatrix) diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 5fd0ef02f229..11b22d7e644d 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -530,6 +530,7 @@ Booster <- R6::R6Class( predictor <- Predictor$new( modelfile = private$handle , params = params + , fast_predict_config = private$fast_predict_config ) return( predictor$predict( @@ -550,6 +551,57 @@ Booster <- R6::R6Class( return(Predictor$new(modelfile = private$handle)) }, + configure_fast_predict = function(csr = FALSE, + start_iteration = NULL, + num_iteration = NULL, + rawscore = FALSE, + predleaf = FALSE, + predcontrib = FALSE, + params = list()) { + + self$restore_handle() + ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle) + + if (is.null(num_iteration)) { + num_iteration <- -1L + } + if (is.null(start_iteration)) { + start_iteration <- 0L + } + + if (!csr) { + fun <- LGBM_BoosterPredictForMatSingleRowFastInit_R + } else { + fun <- LGBM_BoosterPredictForCSRSingleRowFastInit_R + } + + fast_handle <- .Call( + fun + , private$handle + , ncols + , rawscore + , predleaf + , predcontrib + , start_iteration + , num_iteration + , lgb.params2str(params = params) + ) + + private$fast_predict_config <- list( + handle = fast_handle + , csr = as.logical(csr) + , ncols = ncols + , start_iteration = start_iteration + , num_iteration = num_iteration + , rawscore = as.logical(rawscore) + , predleaf = as.logical(predleaf) + , predcontrib = as.logical(predcontrib) + , params = params + ) + + return(invisible(NULL)) + }, + # Used for serialization raw = NULL, @@ -601,6 +653,7 @@ Booster <- R6::R6Class( higher_better_inner_eval = NULL, set_objective_to_none = FALSE, train_set_version = 0L, + fast_predict_config = list(), # Predict data inner_predict = function(idx) { @@ -748,18 +801,15 @@ Booster <- R6::R6Class( ) ) -#' @name predict.lgb.Booster -#' @title Predict method for LightGBM model -#' @description Predicted values based on class \code{lgb.Booster} -#' @param object Object of class \code{lgb.Booster} -#' @param newdata a \code{matrix} object, a \code{dgCMatrix} object or -#' a character representing a path to a text file (CSV, TSV, or LibSVM) +#' @name lgb_predict_shared_params #' @param type Type of prediction to output. Allowed types are:\itemize{ #' \item \code{"response"}: will output the predicted score according to the objective function being #' optimized (depending on the link function that the objective uses), after applying any necessary #' transformations - for example, for \code{objective="binary"}, it will output class probabilities. #' \item \code{"class"}: for classification objectives, will output the class with the highest predicted -#' probability. For other objectives, will output the same as "response". +#' probability. For other objectives, will output the same as "response". Note that \code{"class"} is +#' not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function +#' for more details). #' \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations' #' results) from which the "response" number is produced for a given objective function - for example, #' for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as @@ -780,12 +830,34 @@ Booster <- R6::R6Class( #' If None, if the best iteration exists and start_iteration is None or <= 0, the #' best iteration is used; otherwise, all iterations from start_iteration are used. #' If <= 0, all iterations from start_iteration are used (no limits). -#' @param header only used for prediction for text file. True if text file has header #' @param params a list of additional named parameters. See #' \href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{ #' the "Predict Parameters" section of the documentation} for a list of parameters and #' valid values. Where these conflict with the values of keyword arguments to this function, #' the values in \code{params} take precedence. +NULL + +#' @name predict.lgb.Booster +#' @title Predict method for LightGBM model +#' @description Predicted values based on class \code{lgb.Booster} +#' @details If the model object has been configured for fast single-row predictions through +#' \link{lgb.configure_fast_predict}, this function will use the prediction parameters +#' that were configured for it - as such, extra prediction parameters should not be passed +#' here, otherwise the configuration will be ignored and the slow route will be taken. +#' @inheritParams lgb_predict_shared_params +#' @param object Object of class \code{lgb.Booster} +#' @param newdata a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object, +#' or a character representing a path to a text file (CSV, TSV, or LibSVM). +#' +#' For sparse inputs, if predictions are only going to be made for a single row, it will be faster to +#' use CSR format, in which case the data may be passed as either a single-row CSR matrix (class +#' \code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class +#' \code{dsparseVector} from package \code{Matrix}). +#' +#' If single-row predictions are going to be performed frequently, it is recommended to +#' pre-configure the model object for fast single-row sparse predictions through function +#' \link{lgb.configure_fast_predict}. +#' @param header only used for prediction for text file. True if text file has header #' @param ... ignored #' @return For prediction types that are meant to always return one output per observation (e.g. when predicting #' \code{type="response"} or \code{type="raw"} on a binary classification or regression objective), will @@ -918,12 +990,124 @@ predict.lgb.Booster <- function(object, return(pred) } +#' @title Configure Fast Single-Row Predictions +#' @description Pre-configures a LightGBM model object to produce fast single-row predictions +#' for a given input data type, prediction type, and parameters. +#' @details Calling this function multiple times with different parameters might not override +#' the previous configuration and might trigger undefined behavior. +#' +#' Any saved configuration for fast predictions might be lost after making a single-row +#' prediction of a different type than what was configured (except for types "response" and +#' "class", which can be switched between each other at any time without losing the configuration). +#' +#' In some situations, setting a fast prediction configuration for one type of prediction +#' might cause the prediction function to keep using that configuration for single-row +#' predictions even if the requested type of prediction is different from what was configured. +#' +#' Note that this function will not accept argument \code{type="class"} - for such cases, one +#' can pass \code{type="response"} to this function and then \code{type="class"} to the +#' \code{predict} function - the fast configuration will not be lost or altered if the switch +#' is between "response" and "class". +#' +#' The configuration does not survive de-serializations, so it has to be generated +#' anew in every R process that is going to use it (e.g. if loading a model object +#' through \code{readRDS}, whatever configuration was there previously will be lost). +#' +#' Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} +#' will cause it to ignore the fast-predict configuration and take the slow route instead +#' (but be aware that an existing configuration might not always be overriden by supplying +#' different parameters or prediction type, so make sure to check that the output is what +#' was expected when a prediction is to be made on a single row for something different than +#' what is configured). +#' +#' Note that, if configuring a non-default prediction type (such as leaf indices), +#' then that type must also be passed in the call to \link{predict.lgb.Booster} in +#' order for it to use the configuration. This also applies for \code{start_iteration} +#' and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}. +#' +#' Predictions about feature contributions do not allow a fast route for CSR inputs, +#' and as such, this function will produce an error if passing \code{csr=TRUE} and +#' \code{type = "contrib"} together. +#' @inheritParams lgb_predict_shared_params +#' @param model LighGBM model object (class \code{lgb.Booster}). +#' +#' \bold{The object will be modified in-place}. +#' @param csr Whether the prediction function is going to be called on sparse CSR inputs. +#' If \code{FALSE}, will be assumed that predictions are going to be called on single-row +#' regular R matrices. +#' @return The same \code{model} that was passed as input, invisibly, with the desired +#' configuration stored inside it and available to be used in future calls to +#' \link{predict.lgb.Booster}. +#' @examples +#' \donttest{ +#' library(lightgbm) +#' data(mtcars) +#' X <- as.matrix(mtcars[, -1L]) +#' y <- mtcars[, 1L] +#' dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L)) +#' params <- list(min_data_in_leaf = 2L) +#' model <- lgb.train( +#' params = params +#' , data = dtrain +#' , obj = "regression" +#' , nrounds = 5L +#' , verbose = -1L +#' ) +#' lgb.configure_fast_predict(model) +#' +#' x_single <- X[11L, , drop = FALSE] +#' predict(model, x_single) +#' +#' # Will not use it if the prediction to be made +#' # is different from what was configured +#' predict(model, x_single, type = "leaf") +#' } +#' @export +lgb.configure_fast_predict <- function(model, + csr = FALSE, + start_iteration = NULL, + num_iteration = NULL, + type = "response", + params = list()) { + if (!lgb.is.Booster(x = model)) { + stop("lgb.configure_fast_predict: model should be an ", sQuote("lgb.Booster")) + } + if (type == "class") { + stop("type='class' is not supported for 'lgb.configure_fast_predict'. Use 'response' instead.") + } + + rawscore <- FALSE + predleaf <- FALSE + predcontrib <- FALSE + if (type == "raw") { + rawscore <- TRUE + } else if (type == "leaf") { + predleaf <- TRUE + } else if (type == "contrib") { + predcontrib <- TRUE + } + + if (csr && predcontrib) { + stop("'lgb.configure_fast_predict' does not support feature contributions for CSR data.") + } + model$configure_fast_predict( + csr = csr + , start_iteration = start_iteration + , num_iteration = num_iteration + , rawscore = rawscore + , predleaf = predleaf + , predcontrib = predcontrib + , params = params + ) + return(invisible(model)) +} + #' @name print.lgb.Booster #' @title Print method for LightGBM model #' @description Show summary information about a LightGBM model object (same as \code{summary}). #' @param x Object of class \code{lgb.Booster} #' @param ... Not used -#' @return The same input `x`, returned as invisible. +#' @return The same input \code{x}, returned as invisible. #' @export print.lgb.Booster <- function(x, ...) { # nolint start @@ -972,7 +1156,7 @@ print.lgb.Booster <- function(x, ...) { #' @description Show summary information about a LightGBM model object (same as \code{print}). #' @param object Object of class \code{lgb.Booster} #' @param ... Not used -#' @return The same input `object`, returned as invisible. +#' @return The same input \code{object}, returned as invisible. #' @export summary.lgb.Booster <- function(object, ...) { print(object) @@ -983,7 +1167,7 @@ summary.lgb.Booster <- function(object, ...) { #' @description Load LightGBM takes in either a file path or model string. #' If both are provided, Load will default to loading from file #' @param filename path of model file -#' @param model_str a str containing the model (as a `character` or `raw` vector) +#' @param model_str a str containing the model (as a \code{character} or \code{raw} vector) #' #' @return lgb.Booster #' diff --git a/R-package/R/lgb.Predictor.R b/R-package/R/lgb.Predictor.R index 0e1e80276e19..7f036c9726b6 100644 --- a/R-package/R/lgb.Predictor.R +++ b/R-package/R/lgb.Predictor.R @@ -1,7 +1,7 @@ #' @importFrom methods is new -#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix #' @importFrom R6 R6Class #' @importFrom utils read.delim +#' @importClassesFrom Matrix dsparseMatrix dsparseVector dgCMatrix dgRMatrix CsparseMatrix RsparseMatrix Predictor <- R6::R6Class( classname = "lgb.Predictor", @@ -27,7 +27,7 @@ Predictor <- R6::R6Class( }, # Initialize will create a starter model - initialize = function(modelfile, params = list()) { + initialize = function(modelfile, params = list(), fast_predict_config = list()) { private$params <- lgb.params2str(params = params) handle <- NULL @@ -57,6 +57,8 @@ Predictor <- R6::R6Class( } + private$fast_predict_config <- fast_predict_config + # Override class and store it class(handle) <- "lgb.Booster.handle" private$handle <- handle @@ -236,6 +238,9 @@ Predictor <- R6::R6Class( # Not a file, we need to predict from R object num_row <- nrow(data) + if (is.null(num_row)) { + num_row <- 1L + } npred <- 0L @@ -262,20 +267,175 @@ Predictor <- R6::R6Class( if (storage.mode(data) != "double") { storage.mode(data) <- "double" } - .Call( - LGBM_BoosterPredictForMat_R - , private$handle - , data - , as.integer(nrow(data)) - , as.integer(ncol(data)) - , as.integer(rawscore) - , as.integer(predleaf) - , as.integer(predcontrib) - , as.integer(start_iteration) - , as.integer(num_iteration) - , private$params - , preds - ) + + if (nrow(data) == 1L) { + + use_fast_config <- private$check_can_use_fast_predict_config( + csr = FALSE + , rawscore = rawscore + , predleaf = predleaf + , predcontrib = predcontrib + , start_iteration = start_iteration + , num_iteration = num_iteration + ) + + if (use_fast_config) { + .Call( + LGBM_BoosterPredictForMatSingleRowFast_R + , private$fast_predict_config$handle + , data + , preds + ) + } else { + .Call( + LGBM_BoosterPredictForMatSingleRow_R + , private$handle + , data + , rawscore + , predleaf + , predcontrib + , start_iteration + , num_iteration + , private$params + , preds + ) + } + + } else { + .Call( + LGBM_BoosterPredictForMat_R + , private$handle + , data + , as.integer(nrow(data)) + , as.integer(ncol(data)) + , as.integer(rawscore) + , as.integer(predleaf) + , as.integer(predcontrib) + , as.integer(start_iteration) + , as.integer(num_iteration) + , private$params + , preds + ) + } + + } else if (inherits(data, "dsparseVector")) { + + if (length(self$fast_predict_config)) { + ncols <- self$fast_predict_config$ncols + use_fast_config <- private$check_can_use_fast_predict_config( + csr = TRUE + , rawscore = rawscore + , predleaf = predleaf + , predcontrib = predcontrib + , start_iteration = start_iteration + , num_iteration = num_iteration + ) + } else { + ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle) + use_fast_config <- FALSE + } + + if (length(data) > ncols) { + stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns." + , ncols + , length(data))) + } + + if (use_fast_config) { + .Call( + LGBM_BoosterPredictForCSRSingleRowFast_R + , self$fast_predict_config$handle + , data@i - 1L + , data@x + , preds + ) + } else { + .Call( + LGBM_BoosterPredictForCSRSingleRow_R + , private$handle + , data@i - 1L + , data@x + , ncols + , as.integer(rawscore) + , as.integer(predleaf) + , as.integer(predcontrib) + , start_iteration + , num_iteration + , private$params + , preds + ) + } + + } else if (inherits(data, "dgRMatrix")) { + + ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle) + if (ncol(data) > ncols) { + stop(sprintf("Model was fitted to data with %d columns, input data has %.0f columns." + , ncols + , ncol(data))) + } + + if (nrow(data) == 1L) { + + if (length(self$fast_predict_config)) { + ncols <- self$fast_predict_config$ncols + use_fast_config <- private$check_can_use_fast_predict_config( + csr = TRUE + , rawscore = rawscore + , predleaf = predleaf + , predcontrib = predcontrib + , start_iteration = start_iteration + , num_iteration = num_iteration + ) + } else { + ncols <- .Call(LGBM_BoosterGetNumFeature_R, private$handle) + use_fast_config <- FALSE + } + + if (use_fast_config) { + .Call( + LGBM_BoosterPredictForCSRSingleRowFast_R + , self$fast_predict_config$handle + , data@j + , data@x + , preds + ) + } else { + .Call( + LGBM_BoosterPredictForCSRSingleRow_R + , private$handle + , data@j + , data@x + , ncols + , as.integer(rawscore) + , as.integer(predleaf) + , as.integer(predcontrib) + , start_iteration + , num_iteration + , private$params + , preds + ) + } + + } else { + + .Call( + LGBM_BoosterPredictForCSR_R + , private$handle + , data@p + , data@j + , data@x + , ncols + , as.integer(rawscore) + , as.integer(predleaf) + , as.integer(predcontrib) + , start_iteration + , num_iteration + , private$params + , preds + ) + + } } else if (methods::is(data, "dgCMatrix")) { if (length(data@p) > 2147483647L) { @@ -342,5 +502,36 @@ Predictor <- R6::R6Class( handle = NULL , need_free_handle = FALSE , params = "" + , fast_predict_config = list() + , check_can_use_fast_predict_config = function(csr, + rawscore, + predleaf, + predcontrib, + start_iteration, + num_iteration) { + + if (!NROW(private$fast_predict_config)) { + return(FALSE) + } + + if (lgb.is.null.handle(private$fast_predict_config$handle)) { + warning(paste0("Model had fast CSR predict configuration, but it is inactive." + , " Try re-generating it through 'lgb.configure_fast_predict'.")) + return(FALSE) + } + + if (isTRUE(csr) != private$fast_predict_config$csr) { + return(FALSE) + } + + return( + private$params == "" && + private$fast_predict_config$rawscore == rawscore && + private$fast_predict_config$predleaf == predleaf && + private$fast_predict_config$predcontrib == predcontrib && + lgb.equal.or.both.null(private$fast_predict_config$start_iteration, start_iteration) && + lgb.equal.or.both.null(private$fast_predict_config$num_iteration, num_iteration) + ) + } ) ) diff --git a/R-package/R/lgb.restore_handle.R b/R-package/R/lgb.restore_handle.R index 3d707635174f..d9c7e2993856 100644 --- a/R-package/R/lgb.restore_handle.R +++ b/R-package/R/lgb.restore_handle.R @@ -4,6 +4,10 @@ #' \code{saveRDS}, its underlying C++ object will be blank and needs to be restored to able to use it. Such #' object is restored automatically when calling functions such as \code{predict}, but this function can be #' used to forcibly restore it beforehand. Note that the object will be modified in-place. +#' +#' @details Be aware that fast single-row prediction configurations are not restored through this +#' function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way, +#' call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object. #' @param model \code{lgb.Booster} object which was de-serialized and whose underlying C++ object and R handle #' need to be restored. #' diff --git a/R-package/R/utils.R b/R-package/R/utils.R index 56e499360836..fec39681fd4d 100644 --- a/R-package/R/utils.R +++ b/R-package/R/utils.R @@ -246,3 +246,17 @@ lgb.get.default.num.threads <- function() { return(cores) } } + +lgb.equal.or.both.null <- function(a, b) { + if (is.null(a)) { + if (!is.null(b)) { + return(FALSE) + } + return(TRUE) + } else { + if (is.null(b)) { + return(FALSE) + } + return(a == b) + } +} diff --git a/R-package/man/lgb.configure_fast_predict.Rd b/R-package/man/lgb.configure_fast_predict.Rd new file mode 100644 index 000000000000..4fdce8956cf3 --- /dev/null +++ b/R-package/man/lgb.configure_fast_predict.Rd @@ -0,0 +1,132 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/lgb.Booster.R +\name{lgb.configure_fast_predict} +\alias{lgb.configure_fast_predict} +\title{Configure Fast Single-Row Predictions} +\usage{ +lgb.configure_fast_predict( + model, + csr = FALSE, + start_iteration = NULL, + num_iteration = NULL, + type = "response", + params = list() +) +} +\arguments{ +\item{model}{LighGBM model object (class \code{lgb.Booster}). + + \bold{The object will be modified in-place}.} + +\item{csr}{Whether the prediction function is going to be called on sparse CSR inputs. +If \code{FALSE}, will be assumed that predictions are going to be called on single-row +regular R matrices.} + +\item{start_iteration}{int or None, optional (default=None) +Start index of the iteration to predict. +If None or <= 0, starts from the first iteration.} + +\item{num_iteration}{int or None, optional (default=None) +Limit number of iterations in the prediction. +If None, if the best iteration exists and start_iteration is None or <= 0, the +best iteration is used; otherwise, all iterations from start_iteration are used. +If <= 0, all iterations from start_iteration are used (no limits).} + +\item{type}{Type of prediction to output. Allowed types are:\itemize{ + \item \code{"response"}: will output the predicted score according to the objective function being + optimized (depending on the link function that the objective uses), after applying any necessary + transformations - for example, for \code{objective="binary"}, it will output class probabilities. + \item \code{"class"}: for classification objectives, will output the class with the highest predicted + probability. For other objectives, will output the same as "response". Note that \code{"class"} is + not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function + for more details). + \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations' + results) from which the "response" number is produced for a given objective function - for example, + for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as + "regression", since no transformation is applied, the output will be the same as for "response". + \item \code{"leaf"}: will output the index of the terminal node / leaf at which each observations falls + in each tree in the model, outputted as integers, with one column per tree. + \item \code{"contrib"}: will return the per-feature contributions for each prediction, including an + intercept (each feature will produce one column). + } + + Note that, if using custom objectives, types "class" and "response" will not be available and will + default towards using "raw" instead.} + +\item{params}{a list of additional named parameters. See +\href{https://lightgbm.readthedocs.io/en/latest/Parameters.html#predict-parameters}{ +the "Predict Parameters" section of the documentation} for a list of parameters and +valid values. Where these conflict with the values of keyword arguments to this function, +the values in \code{params} take precedence.} +} +\value{ +The same \code{model} that was passed as input, invisibly, with the desired + configuration stored inside it and available to be used in future calls to + \link{predict.lgb.Booster}. +} +\description{ +Pre-configures a LightGBM model object to produce fast single-row predictions + for a given input data type, prediction type, and parameters. +} +\details{ +Calling this function multiple times with different parameters might not override + the previous configuration and might trigger undefined behavior. + + Any saved configuration for fast predictions might be lost after making a single-row + prediction of a different type than what was configured (except for types "response" and + "class", which can be switched between each other at any time without losing the configuration). + + In some situations, setting a fast prediction configuration for one type of prediction + might cause the prediction function to keep using that configuration for single-row + predictions even if the requested type of prediction is different from what was configured. + + Note that this function will not accept argument \code{type="class"} - for such cases, one + can pass \code{type="response"} to this function and then \code{type="class"} to the + \code{predict} function - the fast configuration will not be lost or altered if the switch + is between "response" and "class". + + The configuration does not survive de-serializations, so it has to be generated + anew in every R process that is going to use it (e.g. if loading a model object + through \code{readRDS}, whatever configuration was there previously will be lost). + + Requesting a different prediction type or passing parameters to \link{predict.lgb.Booster} + will cause it to ignore the fast-predict configuration and take the slow route instead + (but be aware that an existing configuration might not always be overriden by supplying + different parameters or prediction type, so make sure to check that the output is what + was expected when a prediction is to be made on a single row for something different than + what is configured). + + Note that, if configuring a non-default prediction type (such as leaf indices), + then that type must also be passed in the call to \link{predict.lgb.Booster} in + order for it to use the configuration. This also applies for \code{start_iteration} + and \code{num_iteration}, but \bold{the \code{params} list must be empty} in the call to \code{predict}. + + Predictions about feature contributions do not allow a fast route for CSR inputs, + and as such, this function will produce an error if passing \code{csr=TRUE} and + \code{type = "contrib"} together. +} +\examples{ +\donttest{ +library(lightgbm) +data(mtcars) +X <- as.matrix(mtcars[, -1L]) +y <- mtcars[, 1L] +dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L)) +params <- list(min_data_in_leaf = 2L) +model <- lgb.train( + params = params + , data = dtrain + , obj = "regression" + , nrounds = 5L + , verbose = -1L +) +lgb.configure_fast_predict(model) + +x_single <- X[11L, , drop = FALSE] +predict(model, x_single) + +# Will not use it if the prediction to be made +# is different from what was configured +predict(model, x_single, type = "leaf") +} +} diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index 775003c3279f..6031ff8e55bb 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -9,7 +9,7 @@ lgb.load(filename = NULL, model_str = NULL) \arguments{ \item{filename}{path of model file} -\item{model_str}{a str containing the model (as a `character` or `raw` vector)} +\item{model_str}{a str containing the model (as a \code{character} or \code{raw} vector)} } \value{ lgb.Booster diff --git a/R-package/man/lgb.restore_handle.Rd b/R-package/man/lgb.restore_handle.Rd index 31a0fcf9c545..be5bf844fdf2 100644 --- a/R-package/man/lgb.restore_handle.Rd +++ b/R-package/man/lgb.restore_handle.Rd @@ -19,6 +19,11 @@ After a LightGBM model object is de-serialized through functions such as \code{s object is restored automatically when calling functions such as \code{predict}, but this function can be used to forcibly restore it beforehand. Note that the object will be modified in-place. } +\details{ +Be aware that fast single-row prediction configurations are not restored through this +function. If you wish to make fast single-row predictions using a \code{lgb.Booster} loaded this way, +call \link{lgb.configure_fast_predict} on the loaded \code{lgb.Booster} object. +} \examples{ library(lightgbm) data("agaricus.train") diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index 35314c7dc767..95eece678b76 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -18,15 +18,26 @@ \arguments{ \item{object}{Object of class \code{lgb.Booster}} -\item{newdata}{a \code{matrix} object, a \code{dgCMatrix} object or -a character representing a path to a text file (CSV, TSV, or LibSVM)} +\item{newdata}{a \code{matrix} object, a \code{dgCMatrix}, a \code{dgRMatrix} object, a \code{dsparseVector} object, + or a character representing a path to a text file (CSV, TSV, or LibSVM). + + For sparse inputs, if predictions are only going to be made for a single row, it will be faster to + use CSR format, in which case the data may be passed as either a single-row CSR matrix (class + \code{dgRMatrix} from package \code{Matrix}) or as a sparse numeric vector (class + \code{dsparseVector} from package \code{Matrix}). + + If single-row predictions are going to be performed frequently, it is recommended to + pre-configure the model object for fast single-row sparse predictions through function + \link{lgb.configure_fast_predict}.} \item{type}{Type of prediction to output. Allowed types are:\itemize{ \item \code{"response"}: will output the predicted score according to the objective function being optimized (depending on the link function that the objective uses), after applying any necessary transformations - for example, for \code{objective="binary"}, it will output class probabilities. \item \code{"class"}: for classification objectives, will output the class with the highest predicted - probability. For other objectives, will output the same as "response". + probability. For other objectives, will output the same as "response". Note that \code{"class"} is + not a supported type for \link{lgb.configure_fast_predict} (see the documentation of that function + for more details). \item \code{"raw"}: will output the non-transformed numbers (sum of predictions from boosting iterations' results) from which the "response" number is produced for a given objective function - for example, for \code{objective="binary"}, this corresponds to log-odds. For many objectives such as @@ -85,6 +96,12 @@ For prediction types that are meant to always return one output per observation \description{ Predicted values based on class \code{lgb.Booster} } +\details{ +If the model object has been configured for fast single-row predictions through + \link{lgb.configure_fast_predict}, this function will use the prediction parameters + that were configured for it - as such, extra prediction parameters should not be passed + here, otherwise the configuration will be ignored and the slow route will be taken. +} \examples{ \donttest{ data(agaricus.train, package = "lightgbm") diff --git a/R-package/man/print.lgb.Booster.Rd b/R-package/man/print.lgb.Booster.Rd index 47ae13189891..a5057751432c 100644 --- a/R-package/man/print.lgb.Booster.Rd +++ b/R-package/man/print.lgb.Booster.Rd @@ -12,7 +12,7 @@ \item{...}{Not used} } \value{ -The same input `x`, returned as invisible. +The same input \code{x}, returned as invisible. } \description{ Show summary information about a LightGBM model object (same as \code{summary}). diff --git a/R-package/man/summary.lgb.Booster.Rd b/R-package/man/summary.lgb.Booster.Rd index 8af5158feb15..9c2241cb2b23 100644 --- a/R-package/man/summary.lgb.Booster.Rd +++ b/R-package/man/summary.lgb.Booster.Rd @@ -12,7 +12,7 @@ \item{...}{Not used} } \value{ -The same input `object`, returned as invisible. +The same input \code{object}, returned as invisible. } \description{ Show summary information about a LightGBM model object (same as \code{print}). diff --git a/R-package/src/lightgbm_R.cpp b/R-package/src/lightgbm_R.cpp index 560622788422..1d503ab7b465 100644 --- a/R-package/src/lightgbm_R.cpp +++ b/R-package/src/lightgbm_R.cpp @@ -18,6 +18,7 @@ #include #include +#include #include #include #include @@ -839,6 +840,109 @@ SEXP LGBM_BoosterPredictForCSC_R(SEXP handle, R_API_END(); } +SEXP LGBM_BoosterPredictForCSR_R(SEXP handle, + SEXP indptr, + SEXP indices, + SEXP data, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result) { + R_API_BEGIN(); + _AssertBoosterHandleNotNull(handle); + int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib); + const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter))); + int64_t out_len; + CHECK_CALL(LGBM_BoosterPredictForCSR(R_ExternalPtrAddr(handle), + INTEGER(indptr), C_API_DTYPE_INT32, INTEGER(indices), + REAL(data), C_API_DTYPE_FLOAT64, + Rf_xlength(indptr), Rf_xlength(data), Rf_asInteger(ncols), + pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), + parameter_ptr, &out_len, REAL(out_result))); + UNPROTECT(1); + return R_NilValue; + R_API_END(); +} + +SEXP LGBM_BoosterPredictForCSRSingleRow_R(SEXP handle, + SEXP indices, + SEXP data, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result) { + R_API_BEGIN(); + _AssertBoosterHandleNotNull(handle); + int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib); + const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter))); + int nnz = static_cast(Rf_xlength(data)); + const int indptr[] = {0, nnz}; + int64_t out_len; + CHECK_CALL(LGBM_BoosterPredictForCSRSingleRow(R_ExternalPtrAddr(handle), + indptr, C_API_DTYPE_INT32, INTEGER(indices), + REAL(data), C_API_DTYPE_FLOAT64, + 2, nnz, Rf_asInteger(ncols), + pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), + parameter_ptr, &out_len, REAL(out_result))); + UNPROTECT(1); + return R_NilValue; + R_API_END(); +} + +void LGBM_FastConfigFree_wrapped(SEXP handle) { + LGBM_FastConfigFree(static_cast(R_ExternalPtrAddr(handle))); +} + +SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R(SEXP handle, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter) { + R_API_BEGIN(); + _AssertBoosterHandleNotNull(handle); + int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib); + SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter))); + FastConfigHandle out_fastConfig; + CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFastInit(R_ExternalPtrAddr(handle), + pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), + C_API_DTYPE_FLOAT64, Rf_asInteger(ncols), + parameter_ptr, &out_fastConfig)); + R_SetExternalPtrAddr(ret, out_fastConfig); + R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE); + UNPROTECT(2); + return ret; + R_API_END(); +} + +SEXP LGBM_BoosterPredictForCSRSingleRowFast_R(SEXP handle_fastConfig, + SEXP indices, + SEXP data, + SEXP out_result) { + R_API_BEGIN(); + int nnz = static_cast(Rf_xlength(data)); + const int indptr[] = {0, nnz}; + int64_t out_len; + CHECK_CALL(LGBM_BoosterPredictForCSRSingleRowFast(R_ExternalPtrAddr(handle_fastConfig), + indptr, C_API_DTYPE_INT32, INTEGER(indices), + REAL(data), + 2, nnz, + &out_len, REAL(out_result))); + return R_NilValue; + R_API_END(); +} + SEXP LGBM_BoosterPredictForMat_R(SEXP handle, SEXP data, SEXP num_row, @@ -937,6 +1041,66 @@ SEXP LGBM_BoosterPredictSparseOutput_R(SEXP handle, R_API_END(); } +SEXP LGBM_BoosterPredictForMatSingleRow_R(SEXP handle, + SEXP data, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result) { + R_API_BEGIN(); + _AssertBoosterHandleNotNull(handle); + int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib); + const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter))); + double* ptr_ret = REAL(out_result); + int64_t out_len; + CHECK_CALL(LGBM_BoosterPredictForMatSingleRow(R_ExternalPtrAddr(handle), + REAL(data), C_API_DTYPE_FLOAT64, Rf_xlength(data), 1, + pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), + parameter_ptr, &out_len, ptr_ret)); + UNPROTECT(1); + return R_NilValue; + R_API_END(); +} + +SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R(SEXP handle, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter) { + R_API_BEGIN(); + _AssertBoosterHandleNotNull(handle); + int pred_type = GetPredictType(is_rawscore, is_leafidx, is_predcontrib); + SEXP ret = PROTECT(R_MakeExternalPtr(nullptr, R_NilValue, R_NilValue)); + const char* parameter_ptr = CHAR(PROTECT(Rf_asChar(parameter))); + FastConfigHandle out_fastConfig; + CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFastInit(R_ExternalPtrAddr(handle), + pred_type, Rf_asInteger(start_iteration), Rf_asInteger(num_iteration), + C_API_DTYPE_FLOAT64, Rf_asInteger(ncols), + parameter_ptr, &out_fastConfig)); + R_SetExternalPtrAddr(ret, out_fastConfig); + R_RegisterCFinalizerEx(ret, LGBM_FastConfigFree_wrapped, TRUE); + UNPROTECT(2); + return ret; + R_API_END(); +} + +SEXP LGBM_BoosterPredictForMatSingleRowFast_R(SEXP handle_fastConfig, + SEXP data, + SEXP out_result) { + R_API_BEGIN(); + int64_t out_len; + CHECK_CALL(LGBM_BoosterPredictForMatSingleRowFast(R_ExternalPtrAddr(handle_fastConfig), + REAL(data), &out_len, REAL(out_result))); + return R_NilValue; + R_API_END(); +} + SEXP LGBM_BoosterSaveModel_R(SEXP handle, SEXP num_iteration, SEXP feature_importance_type, @@ -1021,52 +1185,59 @@ SEXP LGBM_DumpParamAliases_R() { // .Call() calls static const R_CallMethodDef CallEntries[] = { - {"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1}, - {"LGBM_DatasetCreateFromFile_R" , (DL_FUNC) &LGBM_DatasetCreateFromFile_R , 3}, - {"LGBM_DatasetCreateFromCSC_R" , (DL_FUNC) &LGBM_DatasetCreateFromCSC_R , 8}, - {"LGBM_DatasetCreateFromMat_R" , (DL_FUNC) &LGBM_DatasetCreateFromMat_R , 5}, - {"LGBM_DatasetGetSubset_R" , (DL_FUNC) &LGBM_DatasetGetSubset_R , 4}, - {"LGBM_DatasetSetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetSetFeatureNames_R , 2}, - {"LGBM_DatasetGetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNames_R , 1}, - {"LGBM_DatasetSaveBinary_R" , (DL_FUNC) &LGBM_DatasetSaveBinary_R , 2}, - {"LGBM_DatasetFree_R" , (DL_FUNC) &LGBM_DatasetFree_R , 1}, - {"LGBM_DatasetSetField_R" , (DL_FUNC) &LGBM_DatasetSetField_R , 4}, - {"LGBM_DatasetGetFieldSize_R" , (DL_FUNC) &LGBM_DatasetGetFieldSize_R , 3}, - {"LGBM_DatasetGetField_R" , (DL_FUNC) &LGBM_DatasetGetField_R , 3}, - {"LGBM_DatasetUpdateParamChecking_R", (DL_FUNC) &LGBM_DatasetUpdateParamChecking_R, 2}, - {"LGBM_DatasetGetNumData_R" , (DL_FUNC) &LGBM_DatasetGetNumData_R , 2}, - {"LGBM_DatasetGetNumFeature_R" , (DL_FUNC) &LGBM_DatasetGetNumFeature_R , 2}, - {"LGBM_DatasetGetFeatureNumBin_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNumBin_R , 3}, - {"LGBM_BoosterCreate_R" , (DL_FUNC) &LGBM_BoosterCreate_R , 2}, - {"LGBM_BoosterFree_R" , (DL_FUNC) &LGBM_BoosterFree_R , 1}, - {"LGBM_BoosterCreateFromModelfile_R", (DL_FUNC) &LGBM_BoosterCreateFromModelfile_R, 1}, - {"LGBM_BoosterLoadModelFromString_R", (DL_FUNC) &LGBM_BoosterLoadModelFromString_R, 1}, - {"LGBM_BoosterMerge_R" , (DL_FUNC) &LGBM_BoosterMerge_R , 2}, - {"LGBM_BoosterAddValidData_R" , (DL_FUNC) &LGBM_BoosterAddValidData_R , 2}, - {"LGBM_BoosterResetTrainingData_R" , (DL_FUNC) &LGBM_BoosterResetTrainingData_R , 2}, - {"LGBM_BoosterResetParameter_R" , (DL_FUNC) &LGBM_BoosterResetParameter_R , 2}, - {"LGBM_BoosterGetNumClasses_R" , (DL_FUNC) &LGBM_BoosterGetNumClasses_R , 2}, - {"LGBM_BoosterGetNumFeature_R" , (DL_FUNC) &LGBM_BoosterGetNumFeature_R , 1}, - {"LGBM_BoosterUpdateOneIter_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R , 1}, - {"LGBM_BoosterUpdateOneIterCustom_R", (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R, 4}, - {"LGBM_BoosterRollbackOneIter_R" , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R , 1}, - {"LGBM_BoosterGetCurrentIteration_R", (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R, 2}, - {"LGBM_BoosterGetUpperBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R , 2}, - {"LGBM_BoosterGetLowerBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R , 2}, - {"LGBM_BoosterGetEvalNames_R" , (DL_FUNC) &LGBM_BoosterGetEvalNames_R , 1}, - {"LGBM_BoosterGetEval_R" , (DL_FUNC) &LGBM_BoosterGetEval_R , 3}, - {"LGBM_BoosterGetNumPredict_R" , (DL_FUNC) &LGBM_BoosterGetNumPredict_R , 3}, - {"LGBM_BoosterGetPredict_R" , (DL_FUNC) &LGBM_BoosterGetPredict_R , 3}, - {"LGBM_BoosterPredictForFile_R" , (DL_FUNC) &LGBM_BoosterPredictForFile_R , 10}, - {"LGBM_BoosterCalcNumPredict_R" , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R , 8}, - {"LGBM_BoosterPredictForCSC_R" , (DL_FUNC) &LGBM_BoosterPredictForCSC_R , 14}, - {"LGBM_BoosterPredictForMat_R" , (DL_FUNC) &LGBM_BoosterPredictForMat_R , 11}, - {"LGBM_BoosterPredictSparseOutput_R", (DL_FUNC) &LGBM_BoosterPredictSparseOutput_R, 10}, - {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, - {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, - {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, - {"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0}, - {"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0}, + {"LGBM_HandleIsNull_R" , (DL_FUNC) &LGBM_HandleIsNull_R , 1}, + {"LGBM_DatasetCreateFromFile_R" , (DL_FUNC) &LGBM_DatasetCreateFromFile_R , 3}, + {"LGBM_DatasetCreateFromCSC_R" , (DL_FUNC) &LGBM_DatasetCreateFromCSC_R , 8}, + {"LGBM_DatasetCreateFromMat_R" , (DL_FUNC) &LGBM_DatasetCreateFromMat_R , 5}, + {"LGBM_DatasetGetSubset_R" , (DL_FUNC) &LGBM_DatasetGetSubset_R , 4}, + {"LGBM_DatasetSetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetSetFeatureNames_R , 2}, + {"LGBM_DatasetGetFeatureNames_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNames_R , 1}, + {"LGBM_DatasetSaveBinary_R" , (DL_FUNC) &LGBM_DatasetSaveBinary_R , 2}, + {"LGBM_DatasetFree_R" , (DL_FUNC) &LGBM_DatasetFree_R , 1}, + {"LGBM_DatasetSetField_R" , (DL_FUNC) &LGBM_DatasetSetField_R , 4}, + {"LGBM_DatasetGetFieldSize_R" , (DL_FUNC) &LGBM_DatasetGetFieldSize_R , 3}, + {"LGBM_DatasetGetField_R" , (DL_FUNC) &LGBM_DatasetGetField_R , 3}, + {"LGBM_DatasetUpdateParamChecking_R" , (DL_FUNC) &LGBM_DatasetUpdateParamChecking_R , 2}, + {"LGBM_DatasetGetNumData_R" , (DL_FUNC) &LGBM_DatasetGetNumData_R , 2}, + {"LGBM_DatasetGetNumFeature_R" , (DL_FUNC) &LGBM_DatasetGetNumFeature_R , 2}, + {"LGBM_DatasetGetFeatureNumBin_R" , (DL_FUNC) &LGBM_DatasetGetFeatureNumBin_R , 3}, + {"LGBM_BoosterCreate_R" , (DL_FUNC) &LGBM_BoosterCreate_R , 2}, + {"LGBM_BoosterFree_R" , (DL_FUNC) &LGBM_BoosterFree_R , 1}, + {"LGBM_BoosterCreateFromModelfile_R" , (DL_FUNC) &LGBM_BoosterCreateFromModelfile_R , 1}, + {"LGBM_BoosterLoadModelFromString_R" , (DL_FUNC) &LGBM_BoosterLoadModelFromString_R , 1}, + {"LGBM_BoosterMerge_R" , (DL_FUNC) &LGBM_BoosterMerge_R , 2}, + {"LGBM_BoosterAddValidData_R" , (DL_FUNC) &LGBM_BoosterAddValidData_R , 2}, + {"LGBM_BoosterResetTrainingData_R" , (DL_FUNC) &LGBM_BoosterResetTrainingData_R , 2}, + {"LGBM_BoosterResetParameter_R" , (DL_FUNC) &LGBM_BoosterResetParameter_R , 2}, + {"LGBM_BoosterGetNumClasses_R" , (DL_FUNC) &LGBM_BoosterGetNumClasses_R , 2}, + {"LGBM_BoosterGetNumFeature_R" , (DL_FUNC) &LGBM_BoosterGetNumFeature_R , 1}, + {"LGBM_BoosterUpdateOneIter_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIter_R , 1}, + {"LGBM_BoosterUpdateOneIterCustom_R" , (DL_FUNC) &LGBM_BoosterUpdateOneIterCustom_R , 4}, + {"LGBM_BoosterRollbackOneIter_R" , (DL_FUNC) &LGBM_BoosterRollbackOneIter_R , 1}, + {"LGBM_BoosterGetCurrentIteration_R" , (DL_FUNC) &LGBM_BoosterGetCurrentIteration_R , 2}, + {"LGBM_BoosterGetUpperBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetUpperBoundValue_R , 2}, + {"LGBM_BoosterGetLowerBoundValue_R" , (DL_FUNC) &LGBM_BoosterGetLowerBoundValue_R , 2}, + {"LGBM_BoosterGetEvalNames_R" , (DL_FUNC) &LGBM_BoosterGetEvalNames_R , 1}, + {"LGBM_BoosterGetEval_R" , (DL_FUNC) &LGBM_BoosterGetEval_R , 3}, + {"LGBM_BoosterGetNumPredict_R" , (DL_FUNC) &LGBM_BoosterGetNumPredict_R , 3}, + {"LGBM_BoosterGetPredict_R" , (DL_FUNC) &LGBM_BoosterGetPredict_R , 3}, + {"LGBM_BoosterPredictForFile_R" , (DL_FUNC) &LGBM_BoosterPredictForFile_R , 10}, + {"LGBM_BoosterCalcNumPredict_R" , (DL_FUNC) &LGBM_BoosterCalcNumPredict_R , 8}, + {"LGBM_BoosterPredictForCSC_R" , (DL_FUNC) &LGBM_BoosterPredictForCSC_R , 14}, + {"LGBM_BoosterPredictForCSR_R" , (DL_FUNC) &LGBM_BoosterPredictForCSR_R , 12}, + {"LGBM_BoosterPredictForCSRSingleRow_R" , (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRow_R , 11}, + {"LGBM_BoosterPredictForCSRSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRowFastInit_R, 8}, + {"LGBM_BoosterPredictForCSRSingleRowFast_R" , (DL_FUNC) &LGBM_BoosterPredictForCSRSingleRowFast_R , 4}, + {"LGBM_BoosterPredictSparseOutput_R" , (DL_FUNC) &LGBM_BoosterPredictSparseOutput_R , 10}, + {"LGBM_BoosterPredictForMat_R" , (DL_FUNC) &LGBM_BoosterPredictForMat_R , 11}, + {"LGBM_BoosterPredictForMatSingleRow_R" , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRow_R , 9}, + {"LGBM_BoosterPredictForMatSingleRowFastInit_R", (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFastInit_R, 8}, + {"LGBM_BoosterPredictForMatSingleRowFast_R" , (DL_FUNC) &LGBM_BoosterPredictForMatSingleRowFast_R , 3}, + {"LGBM_BoosterSaveModel_R" , (DL_FUNC) &LGBM_BoosterSaveModel_R , 4}, + {"LGBM_BoosterSaveModelToString_R" , (DL_FUNC) &LGBM_BoosterSaveModelToString_R , 3}, + {"LGBM_BoosterDumpModel_R" , (DL_FUNC) &LGBM_BoosterDumpModel_R , 3}, + {"LGBM_NullBoosterHandleError_R" , (DL_FUNC) &LGBM_NullBoosterHandleError_R , 0}, + {"LGBM_DumpParamAliases_R" , (DL_FUNC) &LGBM_DumpParamAliases_R , 0}, {NULL, NULL, 0} }; diff --git a/R-package/src/lightgbm_R.h b/R-package/src/lightgbm_R.h index 0f2a0949b61c..510ef54c09f3 100644 --- a/R-package/src/lightgbm_R.h +++ b/R-package/src/lightgbm_R.h @@ -545,10 +545,12 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSC_R( * Note: should pre-allocate memory for out_result, * for normal and raw score: its length is equal to num_class * num_data * for leaf index, its length is equal to num_class * num_data * num_iteration +* for feature contributions, its length is equal to num_data * num_class * (num_features + 1) * \param handle Booster handle -* \param data pointer to the data space -* \param num_row number of rows -* \param num_col number columns +* \param indptr array with the index pointer of the data in CSR format +* \param indices array with the non-zero indices of the data in CSR format +* \param data array with the non-zero values of the data in CSR format +* \param ncols number of columns in the data * \param is_rawscore 1 to get raw predictions, before transformations like * converting to probabilities, 0 otherwise * \param is_leafidx 1 to get record of which leaf in each tree @@ -560,11 +562,46 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSC_R( * \param out_result prediction result * \return R NULL value */ -LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R( +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSR_R( SEXP handle, + SEXP indptr, + SEXP indices, SEXP data, - SEXP num_row, - SEXP num_col, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result +); + +/*! +* \brief make prediction for a single row of data +* Note: should pre-allocate memory for out_result, +* for normal and raw score: its length is equal to num_class +* for leaf index, its length is equal to num_class * num_iteration +* for feature contributions, its length is equal to num_class * (num_features + 1) +* \param handle Booster handle +* \param indices array corresponding to the indices of the columns with non-zero values of the row to predict on +* \param data array corresponding to the non-zero values of row to predict on +* \param is_rawscore 1 to get raw predictions, before transformations like +* converting to probabilities, 0 otherwise +* \param is_leafidx 1 to get record of which leaf in each tree +* observations fell into, 0 otherwise +* \param is_predcontrib 1 to get feature contributions, 0 otherwise +* \param start_iteration Start index of the iteration to predict +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter additional parameters +* \param out_result prediction result +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRow_R( + SEXP handle, + SEXP indices, + SEXP data, + SEXP ncols, SEXP is_rawscore, SEXP is_leafidx, SEXP is_predcontrib, @@ -574,6 +611,50 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R( SEXP out_result ); +/*! +* \brief Initialize and return a fast configuration handle to use with ``LGBM_BoosterPredictForCSRSingleRowFast_R``. +* \param handle Booster handle +* \param num_col number columns in the data +* \param is_rawscore 1 to get raw predictions, before transformations like +* converting to probabilities, 0 otherwise +* \param is_leafidx 1 to get record of which leaf in each tree +* observations fell into, 0 otherwise +* \param is_predcontrib 1 to get feature contributions, 0 otherwise +* \param start_iteration Start index of the iteration to predict +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter additional parameters +* \return Fast configuration handle +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRowFastInit_R( + SEXP handle, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter +); + +/*! +* \brief make prediction for a single row of data +* Note: should pre-allocate memory for out_result, +* for normal and raw score: its length is equal to num_class +* for leaf index, its length is equal to num_class * num_iteration +* for feature contributions, its length is equal to num_class * (num_features + 1) +* \param handle_fastConfig Fast configuration handle +* \param indices array corresponding to the indices of the columns with non-zero values of the row to predict on +* \param data array corresponding to the non-zero values of row to predict on +* \param out_result prediction result +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForCSRSingleRowFast_R( + SEXP handle_fastConfig, + SEXP indices, + SEXP data, + SEXP out_result +); + /*! * \brief make feature contribution prediction for a new Dataset * \param handle Booster handle @@ -603,6 +684,113 @@ LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictSparseOutput_R( SEXP parameter ); +/*! +* \brief make prediction for a new Dataset +* Note: should pre-allocate memory for out_result, +* for normal and raw score: its length is equal to num_class * num_data +* for leaf index, its length is equal to num_class * num_data * num_iteration +* \param handle Booster handle +* \param data pointer to the data space +* \param num_row number of rows +* \param num_col number columns +* \param is_rawscore 1 to get raw predictions, before transformations like +* converting to probabilities, 0 otherwise +* \param is_leafidx 1 to get record of which leaf in each tree +* observations fell into, 0 otherwise +* \param is_predcontrib 1 to get feature contributions, 0 otherwise +* \param start_iteration Start index of the iteration to predict +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter additional parameters +* \param out_result prediction result +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMat_R( + SEXP handle, + SEXP data, + SEXP num_row, + SEXP num_col, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result +); + +/*! +* \brief make prediction for a single row of data +* Note: should pre-allocate memory for out_result, +* for normal and raw score: its length is equal to num_class +* for leaf index, its length is equal to num_class * num_iteration +* for feature contributions, its length is equal to num_class * (num_features + 1) +* \param handle Booster handle +* \param data array corresponding to the row to predict on +* \param is_rawscore 1 to get raw predictions, before transformations like +* converting to probabilities, 0 otherwise +* \param is_leafidx 1 to get record of which leaf in each tree +* observations fell into, 0 otherwise +* \param is_predcontrib 1 to get feature contributions, 0 otherwise +* \param start_iteration Start index of the iteration to predict +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter additional parameters +* \param out_result prediction result +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRow_R( + SEXP handle, + SEXP data, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter, + SEXP out_result +); + +/*! +* \brief Initialize and return a fast configuration handle to use with ``LGBM_BoosterPredictForMatSingleRowFast_R``. +* \param handle Booster handle +* \param num_col number columns in the data +* \param is_rawscore 1 to get raw predictions, before transformations like +* converting to probabilities, 0 otherwise +* \param is_leafidx 1 to get record of which leaf in each tree +* observations fell into, 0 otherwise +* \param is_predcontrib 1 to get feature contributions, 0 otherwise +* \param start_iteration Start index of the iteration to predict +* \param num_iteration number of iteration for prediction, <= 0 means no limit +* \param parameter additional parameters +* \return Fast configuration handle +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFastInit_R( + SEXP handle, + SEXP ncols, + SEXP is_rawscore, + SEXP is_leafidx, + SEXP is_predcontrib, + SEXP start_iteration, + SEXP num_iteration, + SEXP parameter +); + +/*! +* \brief make prediction for a single row of data +* Note: should pre-allocate memory for out_result, +* for normal and raw score: its length is equal to num_class +* for leaf index, its length is equal to num_class * num_iteration +* for feature contributions, its length is equal to num_class * (num_features + 1) +* \param handle_fastConfig Fast configuration handle +* \param data array corresponding to the row to predict on +* \param out_result prediction result +* \return R NULL value +*/ +LIGHTGBM_C_EXPORT SEXP LGBM_BoosterPredictForMatSingleRowFast_R( + SEXP handle_fastConfig, + SEXP data, + SEXP out_result +); + /*! * \brief save model into file * \param handle Booster handle diff --git a/R-package/tests/testthat/test_Predictor.R b/R-package/tests/testthat/test_Predictor.R index cd38bebc0bcc..a5003f086cbd 100644 --- a/R-package/tests/testthat/test_Predictor.R +++ b/R-package/tests/testthat/test_Predictor.R @@ -528,6 +528,129 @@ test_that("predictions for multiclass classification are returned as matrix", { expect_equal(ncol(pred), 3L) }) +test_that("Single-row predictions are identical to multi-row ones", { + data(mtcars) + X <- as.matrix(mtcars[, -1L]) + y <- mtcars[, 1L] + dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L)) + params <- list(min_data_in_leaf = 2L) + model <- lgb.train( + params = params + , data = dtrain + , obj = "regression" + , nrounds = 5L + , verbose = -1L + ) + + x1 <- X[1L, , drop = FALSE] + x11 <- X[11L, , drop = FALSE] + x1_spv <- as(x1, "sparseVector") + x11_spv <- as(x11, "sparseVector") + x1_csr <- as(x1, "RsparseMatrix") + x11_csr <- as(x11, "RsparseMatrix") + + pred_all <- predict(model, X) + pred1_wo_config <- predict(model, x1) + pred11_wo_config <- predict(model, x11) + pred1_spv_wo_config <- predict(model, x1_spv) + pred11_spv_wo_config <- predict(model, x11_spv) + pred1_csr_wo_config <- predict(model, x1_csr) + pred11_csr_wo_config <- predict(model, x11_csr) + + lgb.configure_fast_predict(model) + pred1_w_config <- predict(model, x1) + pred11_w_config <- predict(model, x11) + + model <- lgb.train( + params = params + , data = dtrain + , obj = "regression" + , nrounds = 5L + , verbose = -1L + ) + lgb.configure_fast_predict(model, csr = TRUE) + pred1_spv_w_config <- predict(model, x1_spv) + pred11_spv_w_config <- predict(model, x11_spv) + pred1_csr_w_config <- predict(model, x1_csr) + pred11_csr_w_config <- predict(model, x11_csr) + + expect_equal(pred1_wo_config, pred_all[1L]) + expect_equal(pred11_wo_config, pred_all[11L]) + expect_equal(pred1_spv_wo_config, unname(pred_all[1L])) + expect_equal(pred11_spv_wo_config, unname(pred_all[11L])) + expect_equal(pred1_csr_wo_config, pred_all[1L]) + expect_equal(pred11_csr_wo_config, pred_all[11L]) + + expect_equal(pred1_w_config, pred_all[1L]) + expect_equal(pred11_w_config, pred_all[11L]) + expect_equal(pred1_spv_w_config, unname(pred_all[1L])) + expect_equal(pred11_spv_w_config, unname(pred_all[11L])) + expect_equal(pred1_csr_w_config, pred_all[1L]) + expect_equal(pred11_csr_w_config, pred_all[11L]) +}) + +test_that("Fast-predict configuration accepts non-default prediction types", { + data(mtcars) + X <- as.matrix(mtcars[, -1L]) + y <- mtcars[, 1L] + dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L)) + params <- list(min_data_in_leaf = 2L) + model <- lgb.train( + params = params + , data = dtrain + , obj = "regression" + , nrounds = 5L + , verbose = -1L + ) + + x1 <- X[1L, , drop = FALSE] + x11 <- X[11L, , drop = FALSE] + + pred_all <- predict(model, X, type = "leaf") + pred1_wo_config <- predict(model, x1, type = "leaf") + pred11_wo_config <- predict(model, x11, type = "leaf") + expect_equal(pred1_wo_config, pred_all[1L, , drop = FALSE]) + expect_equal(pred11_wo_config, pred_all[11L, , drop = FALSE]) + + lgb.configure_fast_predict(model, type = "leaf") + pred1_w_config <- predict(model, x1, type = "leaf") + pred11_w_config <- predict(model, x11, type = "leaf") + expect_equal(pred1_w_config, pred_all[1L, , drop = FALSE]) + expect_equal(pred11_w_config, pred_all[11L, , drop = FALSE]) +}) + +test_that("Fast-predict configuration does not block other prediction types", { + data(mtcars) + X <- as.matrix(mtcars[, -1L]) + y <- mtcars[, 1L] + dtrain <- lgb.Dataset(X, label = y, params = list(max_bin = 5L)) + params <- list(min_data_in_leaf = 2L) + model <- lgb.train( + params = params + , data = dtrain + , obj = "regression" + , nrounds = 5L + , verbose = -1L + ) + + x1 <- X[1L, , drop = FALSE] + x11 <- X[11L, , drop = FALSE] + + pred_all <- predict(model, X) + pred_all_leaf <- predict(model, X, type = "leaf") + + lgb.configure_fast_predict(model) + pred1_w_config <- predict(model, x1) + pred11_w_config <- predict(model, x11) + pred1_leaf_w_config <- predict(model, x1, type = "leaf") + pred11_leaf_w_config <- predict(model, x11, type = "leaf") + + expect_equal(pred1_w_config, pred_all[1L]) + expect_equal(pred11_w_config, pred_all[11L]) + expect_equal(pred1_leaf_w_config, pred_all_leaf[1L, , drop = FALSE]) + expect_equal(pred11_leaf_w_config, pred_all_leaf[11L, , drop = FALSE]) +}) + test_that("predict type='class' returns predicted class for classification objectives", { data(agaricus.train, package = "lightgbm") X <- as.matrix(agaricus.train$data) diff --git a/R-package/tests/testthat/test_utils.R b/R-package/tests/testthat/test_utils.R index d9bcd6464ad9..4ab05e075ae3 100644 --- a/R-package/tests/testthat/test_utils.R +++ b/R-package/tests/testthat/test_utils.R @@ -135,3 +135,15 @@ test_that("lgb.check.wrapper_param() prefers alias to keyword arg", { expect_equal(params2[["num_iterations"]], num_tree) expect_identical(params2, list(num_iterations = num_tree)) }) + +test_that("lgb.equal.or.both.null produces expected results", { + expect_true(lgb.equal.or.both.null(NULL, NULL)) + expect_false(lgb.equal.or.both.null(1.0, NULL)) + expect_false(lgb.equal.or.both.null(NULL, 1.0)) + expect_true(lgb.equal.or.both.null(1.0, 1.0)) + expect_true(lgb.equal.or.both.null(1.0, 1L)) + expect_false(lgb.equal.or.both.null(NA, NULL)) + expect_false(lgb.equal.or.both.null(NULL, NA)) + expect_false(lgb.equal.or.both.null(10.0, 1L)) + expect_true(lgb.equal.or.both.null(0L, 0L)) +})