diff --git a/R-package/R/lgb.Booster.R b/R-package/R/lgb.Booster.R index 57e1580d94c..cc9ca7cc5c7 100644 --- a/R-package/R/lgb.Booster.R +++ b/R-package/R/lgb.Booster.R @@ -633,7 +633,6 @@ Booster <- R6::R6Class( #' number of columns corresponding to the number of trees. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -651,7 +650,6 @@ Booster <- R6::R6Class( #' learning_rate = 1, #' early_stopping_rounds = 10) #' preds <- predict(model, test$data) -#' } #' #' @rdname predict.lgb.Booster #' @export @@ -692,7 +690,6 @@ predict.lgb.Booster <- function(object, #' @return lgb.Booster #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -713,7 +710,6 @@ predict.lgb.Booster <- function(object, #' load_booster <- lgb.load(filename = "model.txt") #' model_string <- model$save_model_to_string(NULL) # saves best iteration #' load_booster_from_str <- lgb.load(model_str = model_string) -#' } #' #' @rdname lgb.load #' @export @@ -752,7 +748,6 @@ lgb.load <- function(filename = NULL, model_str = NULL){ #' @return lgb.Booster #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -770,7 +765,6 @@ lgb.load <- function(filename = NULL, model_str = NULL){ #' learning_rate = 1, #' early_stopping_rounds = 10) #' lgb.save(model, "model.txt") -#' } #' #' @rdname lgb.save #' @export @@ -801,7 +795,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL){ #' @return json format of model #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -819,7 +812,6 @@ lgb.save <- function(booster, filename, num_iteration = NULL){ #' learning_rate = 1, #' early_stopping_rounds = 10) #' json_model <- lgb.dump(model) -#' } #' #' @rdname lgb.dump #' @export @@ -847,7 +839,6 @@ lgb.dump <- function(booster, num_iteration = NULL){ #' @return vector of evaluation result #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -865,7 +856,6 @@ lgb.dump <- function(booster, num_iteration = NULL){ #' learning_rate = 1, #' early_stopping_rounds = 10) #' lgb.get.eval.result(model, "test", "l2") -#' } #' #' @rdname lgb.get.eval.result #' @export diff --git a/R-package/R/lgb.Dataset.R b/R-package/R/lgb.Dataset.R index 8d0e88960d5..9fd9e8409e4 100644 --- a/R-package/R/lgb.Dataset.R +++ b/R-package/R/lgb.Dataset.R @@ -311,6 +311,7 @@ Dataset <- R6::R6Class( } else if (is.matrix(private$raw_data) || methods::is(private$raw_data, "dgCMatrix")) { # Check if dgCMatrix (sparse matrix column compressed) + # NOTE: requires Matrix package dim(private$raw_data) } else { @@ -392,9 +393,11 @@ Dataset <- R6::R6Class( # Check for info name and handle if (is.null(private$info[[name]])) { + if (lgb.is.null.handle(private$handle)){ - stop("Cannot perform getinfo before construct Dataset.") + stop("Cannot perform getinfo before constructing Dataset.") } + # Get field size of info info_len <- 0L info_len <- lgb.call("LGBM_DatasetGetFieldSize_R", @@ -646,7 +649,6 @@ Dataset <- R6::R6Class( #' @return constructed dataset #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -654,7 +656,6 @@ Dataset <- R6::R6Class( #' lgb.Dataset.save(dtrain, "lgb.Dataset.data") #' dtrain <- lgb.Dataset("lgb.Dataset.data") #' lgb.Dataset.construct(dtrain) -#' } #' #' @export lgb.Dataset <- function(data, @@ -692,7 +693,6 @@ lgb.Dataset <- function(data, #' @return constructed dataset #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -700,7 +700,6 @@ lgb.Dataset <- function(data, #' data(agaricus.test, package = "lightgbm") #' test <- agaricus.test #' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -#' } #' #' @export lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) { @@ -720,13 +719,11 @@ lgb.Dataset.create.valid <- function(dataset, data, info = list(), ...) { #' @param dataset Object of class \code{lgb.Dataset} #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' lgb.Dataset.construct(dtrain) -#' } #' #' @export lgb.Dataset.construct <- function(dataset) { @@ -754,7 +751,6 @@ lgb.Dataset.construct <- function(dataset) { #' be directly used with an \code{lgb.Dataset} object. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -763,7 +759,6 @@ lgb.Dataset.construct <- function(dataset) { #' stopifnot(nrow(dtrain) == nrow(train$data)) #' stopifnot(ncol(dtrain) == ncol(train$data)) #' stopifnot(all(dim(dtrain) == dim(train$data))) -#' } #' #' @rdname dim #' @export @@ -793,7 +788,6 @@ dim.lgb.Dataset <- function(x, ...) { #' Since row names are irrelevant, it is recommended to use \code{colnames} directly. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -803,7 +797,6 @@ dim.lgb.Dataset <- function(x, ...) { #' colnames(dtrain) #' colnames(dtrain) <- make.names(1:ncol(train$data)) #' print(dtrain, verbose = TRUE) -#' } #' #' @rdname dimnames.lgb.Dataset #' @export @@ -864,15 +857,14 @@ dimnames.lgb.Dataset <- function(x) { #' @return constructed sub dataset #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' #' dsub <- lightgbm::slice(dtrain, 1:42) +#' lgb.Dataset.construct(dsub) #' labels <- lightgbm::getinfo(dsub, "label") -#' } #' #' @export slice <- function(dataset, ...) { @@ -911,7 +903,6 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) { #' } #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -923,7 +914,6 @@ slice.lgb.Dataset <- function(dataset, idxset, ...) { #' #' labels2 <- lightgbm::getinfo(dtrain, "label") #' stopifnot(all(labels2 == 1 - labels)) -#' } #' #' @export getinfo <- function(dataset, ...) { @@ -963,7 +953,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { #' } #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -975,7 +964,6 @@ getinfo.lgb.Dataset <- function(dataset, name, ...) { #' #' labels2 <- lightgbm::getinfo(dtrain, "label") #' stopifnot(all.equal(labels2, 1 - labels)) -#' } #' #' @export setinfo <- function(dataset, ...) { @@ -1003,7 +991,6 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) { #' @return passed dataset #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -1011,7 +998,6 @@ setinfo.lgb.Dataset <- function(dataset, name, info, ...) { #' lgb.Dataset.save(dtrain, "lgb.Dataset.data") #' dtrain <- lgb.Dataset("lgb.Dataset.data") #' lgb.Dataset.set.categorical(dtrain, 1:2) -#' } #' #' @rdname lgb.Dataset.set.categorical #' @export @@ -1037,7 +1023,6 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) { #' @return passed dataset #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package ="lightgbm") #' train <- agaricus.train @@ -1046,7 +1031,6 @@ lgb.Dataset.set.categorical <- function(dataset, categorical_feature) { #' test <- agaricus.test #' dtest <- lgb.Dataset(test$data, test = train$label) #' lgb.Dataset.set.reference(dtest, dtrain) -#' } #' #' @rdname lgb.Dataset.set.reference #' @export @@ -1070,13 +1054,11 @@ lgb.Dataset.set.reference <- function(dataset, reference) { #' #' @examples #' -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' lgb.Dataset.save(dtrain, "data.bin") -#' } #' #' @rdname lgb.Dataset.save #' @export diff --git a/R-package/R/lgb.cv.R b/R-package/R/lgb.cv.R index 08a0b51a362..0a82f36e7d6 100644 --- a/R-package/R/lgb.cv.R +++ b/R-package/R/lgb.cv.R @@ -55,7 +55,6 @@ CVBooster <- R6::R6Class( #' @return a trained model \code{lgb.CVBooster}. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -68,7 +67,6 @@ CVBooster <- R6::R6Class( #' min_data = 1, #' learning_rate = 1, #' early_stopping_rounds = 10) -#' } #' @export lgb.cv <- function(params = list(), data, diff --git a/R-package/R/lgb.importance.R b/R-package/R/lgb.importance.R index 6ce5538779c..dbc78b1c848 100644 --- a/R-package/R/lgb.importance.R +++ b/R-package/R/lgb.importance.R @@ -16,13 +16,12 @@ #' } #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' -#' params = list(objective = "binary", +#' params <- list(objective = "binary", #' learning_rate = 0.01, num_leaves = 63, max_depth = -1, #' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) #' model <- lgb.train(params, dtrain, 20) @@ -30,7 +29,6 @@ #' #' tree_imp1 <- lgb.importance(model, percentage = TRUE) #' tree_imp2 <- lgb.importance(model, percentage = FALSE) -#' } #' #' @importFrom magrittr %>% %T>% #' @importFrom data.table := diff --git a/R-package/R/lgb.interprete.R b/R-package/R/lgb.interprete.R index 1f4224f0d3f..db69d5fbd54 100644 --- a/R-package/R/lgb.interprete.R +++ b/R-package/R/lgb.interprete.R @@ -17,8 +17,6 @@ #' For multiclass classification, a \code{list} of \code{data.table} with the Feature column and Contribution columns to each class. #' #' @examples -#' \dontrun{ -#' library(lightgbm) #' Sigmoid <- function(x) 1 / (1 + exp(-x)) #' Logit <- function(x) log(x / (1 - x)) #' data(agaricus.train, package = "lightgbm") @@ -27,15 +25,18 @@ #' setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label))) #' data(agaricus.test, package = "lightgbm") #' test <- agaricus.test -#' -#' params = list(objective = "binary", -#' learning_rate = 0.01, num_leaves = 63, max_depth = -1, -#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) -#' model <- lgb.train(params, dtrain, 20) +#' +#' params <- list( +#' objective = "binary" +#' , learning_rate = 0.01 +#' , num_leaves = 63 +#' , max_depth = -1 +#' , min_data_in_leaf = 1 +#' , min_sum_hessian_in_leaf = 1 +#' ) #' model <- lgb.train(params, dtrain, 20) -#' +#' #' tree_interpretation <- lgb.interprete(model, test$data, 1:5) -#' } #' #' @importFrom magrittr %>% %T>% #' @export diff --git a/R-package/R/lgb.model.dt.tree.R b/R-package/R/lgb.model.dt.tree.R index 05f0c41841f..70355b26faa 100644 --- a/R-package/R/lgb.model.dt.tree.R +++ b/R-package/R/lgb.model.dt.tree.R @@ -30,21 +30,18 @@ #' } #' #' @examples -#' \dontrun{ -#' library(lightgbm) #' #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train #' dtrain <- lgb.Dataset(train$data, label = train$label) #' -#' params = list(objective = "binary", +#' params <- list(objective = "binary", #' learning_rate = 0.01, num_leaves = 63, max_depth = -1, #' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) #' model <- lgb.train(params, dtrain, 20) #' model <- lgb.train(params, dtrain, 20) #' #' tree_dt <- lgb.model.dt.tree(model) -#' } #' #' @importFrom magrittr %>% #' @importFrom data.table := data.table rbindlist diff --git a/R-package/R/lgb.plot.importance.R b/R-package/R/lgb.plot.importance.R index 3149d2f6c5c..badb0560dc4 100644 --- a/R-package/R/lgb.plot.importance.R +++ b/R-package/R/lgb.plot.importance.R @@ -17,20 +17,23 @@ #' and silently returns a processed data.table with \code{top_n} features sorted by defined importance. #' #' @examples -#' \dontrun{ -#' data(agaricus.train, package = "lightgbm") -#' train <- agaricus.train -#' dtrain <- lgb.Dataset(train$data, label = train$label) -#' -#' params = list(objective = "binary", -#' learning_rate = 0.01, num_leaves = 63, max_depth = -1, -#' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) -#' model <- lgb.train(params, dtrain, 20) -#' model <- lgb.train(params, dtrain, 20) -#' -#' tree_imp <- lgb.importance(model, percentage = TRUE) -#' lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") -#' } +# data(agaricus.train, package = "lightgbm") +# train <- agaricus.train +# dtrain <- lgb.Dataset(train$data, label = train$label) +# +# params <- list( +# objective = "binary" +# , learning_rate = 0.01 +# , num_leaves = 63 +# , max_depth = -1 +# , min_data_in_leaf = 1 +# , min_sum_hessian_in_leaf = 1 +# ) +# +# model <- lgb.train(params, dtrain, 20) +# +# tree_imp <- lgb.importance(model, percentage = TRUE) +# lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") #' @importFrom graphics barplot par #' @export lgb.plot.importance <- function(tree_imp, diff --git a/R-package/R/lgb.plot.interpretation.R b/R-package/R/lgb.plot.interpretation.R index 680b52f226a..d0778e7d67e 100644 --- a/R-package/R/lgb.plot.interpretation.R +++ b/R-package/R/lgb.plot.interpretation.R @@ -16,7 +16,6 @@ #' The \code{lgb.plot.interpretation} function creates a \code{barplot}. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' Sigmoid <- function(x) {1 / (1 + exp(-x))} #' Logit <- function(x) {log(x / (1 - x))} @@ -27,7 +26,7 @@ #' data(agaricus.test, package = "lightgbm") #' test <- agaricus.test #' -#' params = list(objective = "binary", +#' params <- list(objective = "binary", #' learning_rate = 0.01, num_leaves = 63, max_depth = -1, #' min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) #' model <- lgb.train(params, dtrain, 20) @@ -35,7 +34,6 @@ #' #' tree_interpretation <- lgb.interprete(model, test$data, 1:5) #' lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) -#' } #' @importFrom graphics barplot par #' @export lgb.plot.interpretation <- function(tree_interpretation_dt, diff --git a/R-package/R/lgb.prepare.R b/R-package/R/lgb.prepare.R index ef237254099..1b4c8ebb852 100644 --- a/R-package/R/lgb.prepare.R +++ b/R-package/R/lgb.prepare.R @@ -1,94 +1,92 @@ -#' Data preparator for LightGBM datasets (numeric) -#' -#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. -#' -#' @param data A data.frame or data.table to prepare. -#' -#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -#' -#' @examples -#' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -#' -#' # When lightgbm package is installed, and you do not want to load it -#' # You can still use the function! -#' lgb.unloader() -#' str(lightgbm::lgb.prepare(data = iris)) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -#' } -#' -#' @export -lgb.prepare <- function(data) { - - # data.table not behaving like data.frame - if ("data.table" %in% class(data)) { - - # Get data classes - list_classes <- sapply(data, class) - - # Convert characters to factors only (we can change them to numeric after) - is_char <- which(list_classes == "character") - if (length(is_char) > 0) { - data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char] - } - - # Convert factors to numeric (integer is more efficient actually) - is_fact <- c(which(list_classes == "factor"), is_char) - if (length(is_fact) > 0) { - data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact] - } - - } else { - - # Default routine (data.frame) - if ("data.frame" %in% class(data)) { - - # Get data classes - list_classes <- sapply(data, class) - - # Convert characters to factors to numeric (integer is more efficient actually) - is_char <- which(list_classes == "character") - if (length(is_char) > 0) { - data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))}) - } - - # Convert factors to numeric (integer is more efficient actually) - is_fact <- which(list_classes == "factor") - if (length(is_fact) > 0) { - data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)}) - } - - } else { - - # What do you think you are doing here? Throw error. - stop("lgb.prepare2: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame") - - } - - } - - return(data) - -} +#' Data preparator for LightGBM datasets (numeric) +#' +#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. +#' +#' @param data A data.frame or data.table to prepare. +#' +#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +#' +#' @examples +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' str(lgb.prepare(data = iris)) # Convert all factors/chars to numeric +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +#' +#' # When lightgbm package is installed, and you do not want to load it +#' # You can still use the function! +#' lgb.unloader() +#' str(lightgbm::lgb.prepare(data = iris)) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... +#' +#' @export +lgb.prepare <- function(data) { + + # data.table not behaving like data.frame + if ("data.table" %in% class(data)) { + + # Get data classes + list_classes <- sapply(data, class) + + # Convert characters to factors only (we can change them to numeric after) + is_char <- which(list_classes == "character") + if (length(is_char) > 0) { + data[, (is_char) := lapply(.SD, function(x) {as.numeric(as.factor(x))}), .SDcols = is_char] + } + + # Convert factors to numeric (integer is more efficient actually) + is_fact <- c(which(list_classes == "factor"), is_char) + if (length(is_fact) > 0) { + data[, (is_fact) := lapply(.SD, function(x) {as.numeric(x)}), .SDcols = is_fact] + } + + } else { + + # Default routine (data.frame) + if ("data.frame" %in% class(data)) { + + # Get data classes + list_classes <- sapply(data, class) + + # Convert characters to factors to numeric (integer is more efficient actually) + is_char <- which(list_classes == "character") + if (length(is_char) > 0) { + data[is_char] <- lapply(data[is_char], function(x) {as.numeric(as.factor(x))}) + } + + # Convert factors to numeric (integer is more efficient actually) + is_fact <- which(list_classes == "factor") + if (length(is_fact) > 0) { + data[is_fact] <- lapply(data[is_fact], function(x) {as.numeric(x)}) + } + + } else { + + # What do you think you are doing here? Throw error. + stop("lgb.prepare2: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame") + + } + + } + + return(data) + +} diff --git a/R-package/R/lgb.prepare2.R b/R-package/R/lgb.prepare2.R index 571bbf19b7b..fa2fc054ac7 100644 --- a/R-package/R/lgb.prepare2.R +++ b/R-package/R/lgb.prepare2.R @@ -1,95 +1,93 @@ -#' Data preparator for LightGBM datasets (integer) -#' -#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. -#' -#' @param data A data.frame or data.table to prepare. -#' -#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. -#' -#' @examples -#' \dontrun{ -#' library(lightgbm) -#' data(iris) -#' -#' str(iris) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -#' -#' str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' -#' # When lightgbm package is installed, and you do not want to load it -#' # You can still use the function! -#' lgb.unloader() -#' str(lightgbm::lgb.prepare2(data = iris)) -#' # 'data.frame': 150 obs. of 5 variables: -#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... -#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... -#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... -#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... -#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... -#' -#' } -#' -#' @export -lgb.prepare2 <- function(data) { - - # data.table not behaving like data.frame - if (inherits(data, "data.table")) { - - # Get data classes - list_classes <- vapply(data, class, character(1)) - - # Convert characters to factors only (we can change them to numeric after) - is_char <- which(list_classes == "character") - if (length(is_char) > 0) { - data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char] - } - - # Convert factors to numeric (integer is more efficient actually) - is_fact <- c(which(list_classes == "factor"), is_char) - if (length(is_fact) > 0) { - data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact] - } - - } else { - - # Default routine (data.frame) - if (inherits(data, "data.frame")) { - - # Get data classes - list_classes <- vapply(data, class, character(1)) - - # Convert characters to factors to numeric (integer is more efficient actually) - is_char <- which(list_classes == "character") - if (length(is_char) > 0) { - data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))}) - } - - # Convert factors to numeric (integer is more efficient actually) - is_fact <- which(list_classes == "factor") - if (length(is_fact) > 0) { - data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)}) - } - - } else { - - # What do you think you are doing here? Throw error. - stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame") - - } - - } - - return(data) - -} +#' Data preparator for LightGBM datasets (integer) +#' +#' Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. +#' +#' @param data A data.frame or data.table to prepare. +#' +#' @return The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. +#' +#' @examples +#' library(lightgbm) +#' data(iris) +#' +#' str(iris) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... +#' +#' # Convert all factors/chars to integer +#' str(lgb.prepare2(data = iris)) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... +#' +#' # When lightgbm package is installed, and you do not want to load it +#' # You can still use the function! +#' lgb.unloader() +#' str(lightgbm::lgb.prepare2(data = iris)) +#' # 'data.frame': 150 obs. of 5 variables: +#' # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... +#' # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... +#' # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... +#' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... +#' # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... +#' +#' @export +lgb.prepare2 <- function(data) { + + # data.table not behaving like data.frame + if (inherits(data, "data.table")) { + + # Get data classes + list_classes <- vapply(data, class, character(1)) + + # Convert characters to factors only (we can change them to numeric after) + is_char <- which(list_classes == "character") + if (length(is_char) > 0) { + data[, (is_char) := lapply(.SD, function(x) {as.integer(as.factor(x))}), .SDcols = is_char] + } + + # Convert factors to numeric (integer is more efficient actually) + is_fact <- c(which(list_classes == "factor"), is_char) + if (length(is_fact) > 0) { + data[, (is_fact) := lapply(.SD, function(x) {as.integer(x)}), .SDcols = is_fact] + } + + } else { + + # Default routine (data.frame) + if (inherits(data, "data.frame")) { + + # Get data classes + list_classes <- vapply(data, class, character(1)) + + # Convert characters to factors to numeric (integer is more efficient actually) + is_char <- which(list_classes == "character") + if (length(is_char) > 0) { + data[is_char] <- lapply(data[is_char], function(x) {as.integer(as.factor(x))}) + } + + # Convert factors to numeric (integer is more efficient actually) + is_fact <- which(list_classes == "factor") + if (length(is_fact) > 0) { + data[is_fact] <- lapply(data[is_fact], function(x) {as.integer(x)}) + } + + } else { + + # What do you think you are doing here? Throw error. + stop("lgb.prepare: you provided ", paste(class(data), collapse = " & "), " but data should have class data.frame") + + } + + } + + return(data) + +} diff --git a/R-package/R/lgb.prepare_rules.R b/R-package/R/lgb.prepare_rules.R index 6d61c7dcd8c..f552028153b 100644 --- a/R-package/R/lgb.prepare_rules.R +++ b/R-package/R/lgb.prepare_rules.R @@ -8,7 +8,6 @@ #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(iris) #' @@ -66,8 +65,6 @@ #' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... #' # $ Species : num 3 3 3 3 3 3 3 3 3 3 ... #' -#' } -#' #' @importFrom data.table set #' @export lgb.prepare_rules <- function(data, rules = NULL) { diff --git a/R-package/R/lgb.prepare_rules2.R b/R-package/R/lgb.prepare_rules2.R index 21bfa8251ea..6783082e505 100644 --- a/R-package/R/lgb.prepare_rules2.R +++ b/R-package/R/lgb.prepare_rules2.R @@ -8,7 +8,6 @@ #' @return A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The data must be converted to a matrix format (\code{as.matrix}) for input in lgb.Dataset. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(iris) #' @@ -66,8 +65,6 @@ #' # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... #' # $ Species : int 3 3 3 3 3 3 3 3 3 3 ... #' -#' } -#' #' @importFrom data.table set #' @export lgb.prepare_rules2 <- function(data, rules = NULL) { diff --git a/R-package/R/lgb.train.R b/R-package/R/lgb.train.R index 6a44e1ea7c0..f64e783682e 100644 --- a/R-package/R/lgb.train.R +++ b/R-package/R/lgb.train.R @@ -26,7 +26,6 @@ #' @return a trained booster model \code{lgb.Booster}. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -43,7 +42,6 @@ #' min_data = 1, #' learning_rate = 1, #' early_stopping_rounds = 10) -#' } #' #' @export lgb.train <- function(params = list(), diff --git a/R-package/R/lgb.unloader.R b/R-package/R/lgb.unloader.R index e4fb1050fda..837020816c8 100644 --- a/R-package/R/lgb.unloader.R +++ b/R-package/R/lgb.unloader.R @@ -9,7 +9,6 @@ #' @return NULL invisibly. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -32,7 +31,6 @@ #' #' library(lightgbm) #' # Do whatever you want again with LightGBM without object clashing -#' } #' #' @export lgb.unloader <- function(restore = TRUE, wipe = FALSE, envir = .GlobalEnv) { diff --git a/R-package/R/readRDS.lgb.Booster.R b/R-package/R/readRDS.lgb.Booster.R index 36310bf5766..ac2d5c40d0b 100644 --- a/R-package/R/readRDS.lgb.Booster.R +++ b/R-package/R/readRDS.lgb.Booster.R @@ -8,7 +8,6 @@ #' @return lgb.Booster. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -27,7 +26,6 @@ #' early_stopping_rounds = 10) #' saveRDS.lgb.Booster(model, "model.rds") #' new_model <- readRDS.lgb.Booster("model.rds") -#' } #' #' @export readRDS.lgb.Booster <- function(file = "", refhook = NULL) { diff --git a/R-package/R/saveRDS.lgb.Booster.R b/R-package/R/saveRDS.lgb.Booster.R index c36c92979d0..5c258507ce3 100644 --- a/R-package/R/saveRDS.lgb.Booster.R +++ b/R-package/R/saveRDS.lgb.Booster.R @@ -13,7 +13,6 @@ #' @return NULL invisibly. #' #' @examples -#' \dontrun{ #' library(lightgbm) #' data(agaricus.train, package = "lightgbm") #' train <- agaricus.train @@ -23,16 +22,16 @@ #' dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) #' params <- list(objective = "regression", metric = "l2") #' valids <- list(test = dtest) -#' model <- lgb.train(params, -#' dtrain, -#' 100, -#' valids, -#' min_data = 1, -#' learning_rate = 1, -#' early_stopping_rounds = 10) +#' model <- lgb.train( +#' params +#' , dtrain +#' , 100 +#' , valids +#' , min_data = 1 +#' , learning_rate = 1 +#' , early_stopping_rounds = 10 +#' ) #' saveRDS.lgb.Booster(model, "model.rds") -#' } -#' #' @export saveRDS.lgb.Booster <- function(object, file = "", diff --git a/R-package/man/dim.Rd b/R-package/man/dim.Rd index 6bae0010cf8..a8a567c9b85 100644 --- a/R-package/man/dim.Rd +++ b/R-package/man/dim.Rd @@ -22,7 +22,6 @@ Note: since \code{nrow} and \code{ncol} internally use \code{dim}, they can also be directly used with an \code{lgb.Dataset} object. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -31,6 +30,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label) stopifnot(nrow(dtrain) == nrow(train$data)) stopifnot(ncol(dtrain) == ncol(train$data)) stopifnot(all(dim(dtrain) == dim(train$data))) -} } diff --git a/R-package/man/dimnames.lgb.Dataset.Rd b/R-package/man/dimnames.lgb.Dataset.Rd index 40d4f6ec8a3..54563ac5c00 100644 --- a/R-package/man/dimnames.lgb.Dataset.Rd +++ b/R-package/man/dimnames.lgb.Dataset.Rd @@ -24,7 +24,6 @@ Generic \code{dimnames} methods are used by \code{colnames}. Since row names are irrelevant, it is recommended to use \code{colnames} directly. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -34,6 +33,5 @@ dimnames(dtrain) colnames(dtrain) colnames(dtrain) <- make.names(1:ncol(train$data)) print(dtrain, verbose = TRUE) -} } diff --git a/R-package/man/getinfo.Rd b/R-package/man/getinfo.Rd index b9e271864f9..04116a46b47 100644 --- a/R-package/man/getinfo.Rd +++ b/R-package/man/getinfo.Rd @@ -33,7 +33,6 @@ The \code{name} field can be one of the following: } } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -45,6 +44,5 @@ lightgbm::setinfo(dtrain, "label", 1 - labels) labels2 <- lightgbm::getinfo(dtrain, "label") stopifnot(all(labels2 == 1 - labels)) -} } diff --git a/R-package/man/lgb.Dataset.Rd b/R-package/man/lgb.Dataset.Rd index 4b9fbf70c61..e601996e4a3 100644 --- a/R-package/man/lgb.Dataset.Rd +++ b/R-package/man/lgb.Dataset.Rd @@ -32,7 +32,6 @@ Construct lgb.Dataset object from dense matrix, sparse matrix or local file (that was created previously by saving an \code{lgb.Dataset}). } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -40,6 +39,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.save(dtrain, "lgb.Dataset.data") dtrain <- lgb.Dataset("lgb.Dataset.data") lgb.Dataset.construct(dtrain) -} } diff --git a/R-package/man/lgb.Dataset.construct.Rd b/R-package/man/lgb.Dataset.construct.Rd index 06ee95eafd4..23dfc0e9f67 100644 --- a/R-package/man/lgb.Dataset.construct.Rd +++ b/R-package/man/lgb.Dataset.construct.Rd @@ -13,12 +13,10 @@ lgb.Dataset.construct(dataset) Construct Dataset explicitly } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.construct(dtrain) -} } diff --git a/R-package/man/lgb.Dataset.create.valid.Rd b/R-package/man/lgb.Dataset.create.valid.Rd index 635c74acd7e..0d0f3454a8e 100644 --- a/R-package/man/lgb.Dataset.create.valid.Rd +++ b/R-package/man/lgb.Dataset.create.valid.Rd @@ -22,7 +22,6 @@ constructed dataset Construct validation data according to training data } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -30,6 +29,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label) data(agaricus.test, package = "lightgbm") test <- agaricus.test dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) -} } diff --git a/R-package/man/lgb.Dataset.save.Rd b/R-package/man/lgb.Dataset.save.Rd index 947d0672692..f5664a9841a 100644 --- a/R-package/man/lgb.Dataset.save.Rd +++ b/R-package/man/lgb.Dataset.save.Rd @@ -19,12 +19,10 @@ Save \code{lgb.Dataset} to a binary file } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.save(dtrain, "data.bin") -} } diff --git a/R-package/man/lgb.Dataset.set.categorical.Rd b/R-package/man/lgb.Dataset.set.categorical.Rd index e6e38bbb444..1cec77c13d8 100644 --- a/R-package/man/lgb.Dataset.set.categorical.Rd +++ b/R-package/man/lgb.Dataset.set.categorical.Rd @@ -18,7 +18,6 @@ passed dataset Set categorical feature of \code{lgb.Dataset} } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -26,6 +25,5 @@ dtrain <- lgb.Dataset(train$data, label = train$label) lgb.Dataset.save(dtrain, "lgb.Dataset.data") dtrain <- lgb.Dataset("lgb.Dataset.data") lgb.Dataset.set.categorical(dtrain, 1:2) -} } diff --git a/R-package/man/lgb.Dataset.set.reference.Rd b/R-package/man/lgb.Dataset.set.reference.Rd index 9f5ba09350f..fabe7c03e6f 100644 --- a/R-package/man/lgb.Dataset.set.reference.Rd +++ b/R-package/man/lgb.Dataset.set.reference.Rd @@ -18,7 +18,6 @@ passed dataset If you want to use validation data, you should set reference to training data } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package ="lightgbm") train <- agaricus.train @@ -27,6 +26,5 @@ data(agaricus.test, package = "lightgbm") test <- agaricus.test dtest <- lgb.Dataset(test$data, test = train$label) lgb.Dataset.set.reference(dtest, dtrain) -} } diff --git a/R-package/man/lgb.cv.Rd b/R-package/man/lgb.cv.Rd index 649221fd3ca..1aeb6fb1a82 100644 --- a/R-package/man/lgb.cv.Rd +++ b/R-package/man/lgb.cv.Rd @@ -80,7 +80,6 @@ a trained model \code{lgb.CVBooster}. Cross validation logic used by LightGBM } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -94,4 +93,3 @@ model <- lgb.cv(params, learning_rate = 1, early_stopping_rounds = 10) } -} diff --git a/R-package/man/lgb.dump.Rd b/R-package/man/lgb.dump.Rd index c30de3bc299..a18483e3dad 100644 --- a/R-package/man/lgb.dump.Rd +++ b/R-package/man/lgb.dump.Rd @@ -18,7 +18,6 @@ json format of model Dump LightGBM model to json } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -36,6 +35,5 @@ model <- lgb.train(params, learning_rate = 1, early_stopping_rounds = 10) json_model <- lgb.dump(model) -} } diff --git a/R-package/man/lgb.get.eval.result.Rd b/R-package/man/lgb.get.eval.result.Rd index ba04ebfef88..52dd2b1dd18 100644 --- a/R-package/man/lgb.get.eval.result.Rd +++ b/R-package/man/lgb.get.eval.result.Rd @@ -25,7 +25,6 @@ vector of evaluation result Get record evaluation result from booster } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -43,6 +42,5 @@ model <- lgb.train(params, learning_rate = 1, early_stopping_rounds = 10) lgb.get.eval.result(model, "test", "l2") -} } diff --git a/R-package/man/lgb.importance.Rd b/R-package/man/lgb.importance.Rd index 616ba720fc7..8e7fe941644 100644 --- a/R-package/man/lgb.importance.Rd +++ b/R-package/man/lgb.importance.Rd @@ -24,13 +24,12 @@ For a tree model, a \code{data.table} with the following columns: Creates a \code{data.table} of feature importances in a model. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) -params = list(objective = "binary", +params <- list(objective = "binary", learning_rate = 0.01, num_leaves = 63, max_depth = -1, min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) model <- lgb.train(params, dtrain, 20) @@ -38,6 +37,5 @@ model <- lgb.train(params, dtrain, 20) tree_imp1 <- lgb.importance(model, percentage = TRUE) tree_imp2 <- lgb.importance(model, percentage = FALSE) -} } diff --git a/R-package/man/lgb.interprete.Rd b/R-package/man/lgb.interprete.Rd index 7ad5b4ded8c..da28a542a85 100644 --- a/R-package/man/lgb.interprete.Rd +++ b/R-package/man/lgb.interprete.Rd @@ -27,8 +27,6 @@ For multiclass classification, a \code{list} of \code{data.table} with the Featu Computes feature contribution components of rawscore prediction. } \examples{ -\dontrun{ -library(lightgbm) Sigmoid <- function(x) 1 / (1 + exp(-x)) Logit <- function(x) log(x / (1 - x)) data(agaricus.train, package = "lightgbm") @@ -38,13 +36,16 @@ setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)) data(agaricus.test, package = "lightgbm") test <- agaricus.test -params = list(objective = "binary", - learning_rate = 0.01, num_leaves = 63, max_depth = -1, - min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) - model <- lgb.train(params, dtrain, 20) +params <- list( + objective = "binary" + , learning_rate = 0.01 + , num_leaves = 63 + , max_depth = -1 + , min_data_in_leaf = 1 + , min_sum_hessian_in_leaf = 1 +) model <- lgb.train(params, dtrain, 20) tree_interpretation <- lgb.interprete(model, test$data, 1:5) -} } diff --git a/R-package/man/lgb.load.Rd b/R-package/man/lgb.load.Rd index 2dd117a6cd2..bf298920e75 100644 --- a/R-package/man/lgb.load.Rd +++ b/R-package/man/lgb.load.Rd @@ -20,7 +20,6 @@ Load LightGBM takes in either a file path or model string If both are provided, Load will default to loading from file } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -41,6 +40,5 @@ lgb.save(model, "model.txt") load_booster <- lgb.load(filename = "model.txt") model_string <- model$save_model_to_string(NULL) # saves best iteration load_booster_from_str <- lgb.load(model_str = model_string) -} } diff --git a/R-package/man/lgb.model.dt.tree.Rd b/R-package/man/lgb.model.dt.tree.Rd index fb0494fd221..be622f428e4 100644 --- a/R-package/man/lgb.model.dt.tree.Rd +++ b/R-package/man/lgb.model.dt.tree.Rd @@ -39,20 +39,17 @@ The columns of the \code{data.table} are: Parse a LightGBM model json dump into a \code{data.table} structure. } \examples{ -\dontrun{ -library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) -params = list(objective = "binary", +params <- list(objective = "binary", learning_rate = 0.01, num_leaves = 63, max_depth = -1, min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) model <- lgb.train(params, dtrain, 20) model <- lgb.train(params, dtrain, 20) tree_dt <- lgb.model.dt.tree(model) -} } diff --git a/R-package/man/lgb.plot.importance.Rd b/R-package/man/lgb.plot.importance.Rd index a5453057464..9596713d9af 100644 --- a/R-package/man/lgb.plot.importance.Rd +++ b/R-package/man/lgb.plot.importance.Rd @@ -29,19 +29,3 @@ Plot previously calculated feature importance: Gain, Cover and Frequency, as a b The graph represents each feature as a horizontal bar of length proportional to the defined importance of a feature. Features are shown ranked in a decreasing importance order. } -\examples{ -\dontrun{ -data(agaricus.train, package = "lightgbm") -train <- agaricus.train -dtrain <- lgb.Dataset(train$data, label = train$label) - -params = list(objective = "binary", - learning_rate = 0.01, num_leaves = 63, max_depth = -1, - min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) - model <- lgb.train(params, dtrain, 20) -model <- lgb.train(params, dtrain, 20) - -tree_imp <- lgb.importance(model, percentage = TRUE) -lgb.plot.importance(tree_imp, top_n = 10, measure = "Gain") -} -} diff --git a/R-package/man/lgb.plot.interpretation.Rd b/R-package/man/lgb.plot.interpretation.Rd index 3da9d8ae50c..c69b8f3354e 100644 --- a/R-package/man/lgb.plot.interpretation.Rd +++ b/R-package/man/lgb.plot.interpretation.Rd @@ -29,7 +29,6 @@ The graph represents each feature as a horizontal bar of length proportional to Features are shown ranked in a decreasing contribution order. } \examples{ -\dontrun{ library(lightgbm) Sigmoid <- function(x) {1 / (1 + exp(-x))} Logit <- function(x) {log(x / (1 - x))} @@ -40,7 +39,7 @@ setinfo(dtrain, "init_score", rep(Logit(mean(train$label)), length(train$label)) data(agaricus.test, package = "lightgbm") test <- agaricus.test -params = list(objective = "binary", +params <- list(objective = "binary", learning_rate = 0.01, num_leaves = 63, max_depth = -1, min_data_in_leaf = 1, min_sum_hessian_in_leaf = 1) model <- lgb.train(params, dtrain, 20) @@ -49,4 +48,3 @@ model <- lgb.train(params, dtrain, 20) tree_interpretation <- lgb.interprete(model, test$data, 1:5) lgb.plot.interpretation(tree_interpretation[[1]], top_n = 10) } -} diff --git a/R-package/man/lgb.prepare.Rd b/R-package/man/lgb.prepare.Rd index 14a88efd247..625cb5a8e2d 100644 --- a/R-package/man/lgb.prepare.Rd +++ b/R-package/man/lgb.prepare.Rd @@ -16,7 +16,6 @@ The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric without integers. Please use \code{lgb.prepare_rules} if you want to apply this transformation to other datasets. } \examples{ -\dontrun{ library(lightgbm) data(iris) @@ -46,6 +45,5 @@ str(lightgbm::lgb.prepare(data = iris)) # $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ... # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... # $ Species : num 1 1 1 1 1 1 1 1 1 1 ... -} } diff --git a/R-package/man/lgb.prepare2.Rd b/R-package/man/lgb.prepare2.Rd index cb84885b4f9..5739b03363d 100644 --- a/R-package/man/lgb.prepare2.Rd +++ b/R-package/man/lgb.prepare2.Rd @@ -16,7 +16,6 @@ The cleaned dataset. It must be converted to a matrix format (\code{as.matrix}) Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). Please use \code{lgb.prepare_rules2} if you want to apply this transformation to other datasets. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. } \examples{ -\dontrun{ library(lightgbm) data(iris) @@ -28,7 +27,8 @@ str(iris) # $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ... # $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 ... -str(lgb.prepare2(data = iris)) # Convert all factors/chars to integer +# Convert all factors/chars to integer +str(lgb.prepare2(data = iris)) # 'data.frame': 150 obs. of 5 variables: # $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ... # $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ... @@ -48,5 +48,3 @@ str(lightgbm::lgb.prepare2(data = iris)) # $ Species : int 1 1 1 1 1 1 1 1 1 1 ... } - -} diff --git a/R-package/man/lgb.prepare_rules.Rd b/R-package/man/lgb.prepare_rules.Rd index 9290c8d8e13..ac8a8d43ef4 100644 --- a/R-package/man/lgb.prepare_rules.Rd +++ b/R-package/man/lgb.prepare_rules.Rd @@ -18,7 +18,6 @@ A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric. In addition, keeps rules created so you can convert other datasets using this converter. } \examples{ -\dontrun{ library(lightgbm) data(iris) @@ -77,5 +76,3 @@ str(newest_iris$data) # SUCCESS! # $ Species : num 3 3 3 3 3 3 3 3 3 3 ... } - -} diff --git a/R-package/man/lgb.prepare_rules2.Rd b/R-package/man/lgb.prepare_rules2.Rd index 44414c834b9..2422cbc073d 100644 --- a/R-package/man/lgb.prepare_rules2.Rd +++ b/R-package/man/lgb.prepare_rules2.Rd @@ -18,7 +18,6 @@ A list with the cleaned dataset (\code{data}) and the rules (\code{rules}). The Attempts to prepare a clean dataset to prepare to put in a lgb.Dataset. Factors and characters are converted to numeric (specifically: integer). In addition, keeps rules created so you can convert other datasets using this converter. This is useful if you have a specific need for integer dataset instead of numeric dataset. Note that there are programs which do not support integer-only input. Consider this as a half memory technique which is dangerous, especially for LightGBM. } \examples{ -\dontrun{ library(lightgbm) data(iris) @@ -77,5 +76,3 @@ str(newest_iris$data) # SUCCESS! # $ Species : int 3 3 3 3 3 3 3 3 3 3 ... } - -} diff --git a/R-package/man/lgb.save.Rd b/R-package/man/lgb.save.Rd index 45514f10a00..bad52ad603a 100644 --- a/R-package/man/lgb.save.Rd +++ b/R-package/man/lgb.save.Rd @@ -20,7 +20,6 @@ lgb.Booster Save LightGBM model } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -38,6 +37,5 @@ model <- lgb.train(params, learning_rate = 1, early_stopping_rounds = 10) lgb.save(model, "model.txt") -} } diff --git a/R-package/man/lgb.train.Rd b/R-package/man/lgb.train.Rd index 55248875e7e..c771fc1f3db 100644 --- a/R-package/man/lgb.train.Rd +++ b/R-package/man/lgb.train.Rd @@ -69,7 +69,6 @@ a trained booster model \code{lgb.Booster}. Logic to train with LightGBM } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -86,6 +85,5 @@ model <- lgb.train(params, min_data = 1, learning_rate = 1, early_stopping_rounds = 10) -} } diff --git a/R-package/man/lgb.unloader.Rd b/R-package/man/lgb.unloader.Rd index 9569eb5f519..9ea57f54a19 100644 --- a/R-package/man/lgb.unloader.Rd +++ b/R-package/man/lgb.unloader.Rd @@ -20,7 +20,6 @@ NULL invisibly. Attempts to unload LightGBM packages so you can remove objects cleanly without having to restart R. This is useful for instance if an object becomes stuck for no apparent reason and you do not want to restart R to fix the lost object. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -43,6 +42,5 @@ gc() # Not needed if wipe = TRUE library(lightgbm) # Do whatever you want again with LightGBM without object clashing -} } diff --git a/R-package/man/predict.lgb.Booster.Rd b/R-package/man/predict.lgb.Booster.Rd index 75aea707ee6..310375ba874 100644 --- a/R-package/man/predict.lgb.Booster.Rd +++ b/R-package/man/predict.lgb.Booster.Rd @@ -44,7 +44,6 @@ number of columns corresponding to the number of trees. Predicted values based on class \code{lgb.Booster} } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -62,6 +61,5 @@ model <- lgb.train(params, learning_rate = 1, early_stopping_rounds = 10) preds <- predict(model, test$data) -} } diff --git a/R-package/man/readRDS.lgb.Booster.Rd b/R-package/man/readRDS.lgb.Booster.Rd index 991c0a72cf1..05f4eedb104 100644 --- a/R-package/man/readRDS.lgb.Booster.Rd +++ b/R-package/man/readRDS.lgb.Booster.Rd @@ -18,7 +18,6 @@ lgb.Booster. Attempts to load a model using RDS. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -37,6 +36,5 @@ model <- lgb.train(params, early_stopping_rounds = 10) saveRDS.lgb.Booster(model, "model.rds") new_model <- readRDS.lgb.Booster("model.rds") -} } diff --git a/R-package/man/saveRDS.lgb.Booster.Rd b/R-package/man/saveRDS.lgb.Booster.Rd index d9bdd9f7b26..b302b2c3b6c 100644 --- a/R-package/man/saveRDS.lgb.Booster.Rd +++ b/R-package/man/saveRDS.lgb.Booster.Rd @@ -29,7 +29,6 @@ NULL invisibly. Attempts to save a model using RDS. Has an additional parameter (\code{raw}) which decides whether to save the raw model or not. } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -39,14 +38,14 @@ test <- agaricus.test dtest <- lgb.Dataset.create.valid(dtrain, test$data, label = test$label) params <- list(objective = "regression", metric = "l2") valids <- list(test = dtest) -model <- lgb.train(params, - dtrain, - 100, - valids, - min_data = 1, - learning_rate = 1, - early_stopping_rounds = 10) +model <- lgb.train( + params + , dtrain + , 100 + , valids + , min_data = 1 + , learning_rate = 1 + , early_stopping_rounds = 10 +) saveRDS.lgb.Booster(model, "model.rds") } - -} diff --git a/R-package/man/setinfo.Rd b/R-package/man/setinfo.Rd index bf6d06a84a9..92da5034659 100644 --- a/R-package/man/setinfo.Rd +++ b/R-package/man/setinfo.Rd @@ -35,7 +35,6 @@ The \code{name} field can be one of the following: } } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train @@ -47,6 +46,5 @@ lightgbm::setinfo(dtrain, "label", 1 - labels) labels2 <- lightgbm::getinfo(dtrain, "label") stopifnot(all.equal(labels2, 1 - labels)) -} } diff --git a/R-package/man/slice.Rd b/R-package/man/slice.Rd index e65ef2a2765..e126b89a837 100644 --- a/R-package/man/slice.Rd +++ b/R-package/man/slice.Rd @@ -24,14 +24,13 @@ Get a new \code{lgb.Dataset} containing the specified rows of original lgb.Dataset object } \examples{ -\dontrun{ library(lightgbm) data(agaricus.train, package = "lightgbm") train <- agaricus.train dtrain <- lgb.Dataset(train$data, label = train$label) dsub <- lightgbm::slice(dtrain, 1:42) +lgb.Dataset.construct(dsub) labels <- lightgbm::getinfo(dsub, "label") -} }