Skip to content

Commit

Permalink
[R-package] CRAN fixes (#1499)
Browse files Browse the repository at this point in the history
* Fixed typos in docs

* Fixed inconsistencies in documentation

* Updated strategy for registering routines

* Fixed issues caused by smashing multiple functions into one Rd

* Fixed issues with documentation

* Removed VignetteBuilder and updated Rbuildignore

* Added R build artefacts to gitignore

* Added namespacing on data.table set function. Updated handling of CMakeLists file to get around CRAN check.

* Updated build instructions

* Added R build script

* Removed build_r.sh script and updated R-package install instructions
  • Loading branch information
jameslamb authored and chivee committed Aug 29, 2018
1 parent 80a9a94 commit eded794
Show file tree
Hide file tree
Showing 35 changed files with 870 additions and 662 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Expand Up @@ -382,3 +382,11 @@ lightgbm.model
# duplicate version file
python-package/lightgbm/VERSION.txt
.Rproj.user

# R build artefacts
R-package/src/CMakeLists.txt
R-package/src/lib_lightgbm.so.dSYM/
R-package/src/src/
lightgbm_r/*
lightgbm*.tar.gz
lightgbm.Rcheck/
11 changes: 11 additions & 0 deletions R-package/.Rbuildignore
@@ -1 +1,12 @@
^build_package.R$
\.gitkeep$

# Objects created by compilation
\.o$
\.so$
\.dll$
\.out$
\.bin$

# Code copied in at build time
^src/CMakeLists.txt$
5 changes: 2 additions & 3 deletions R-package/DESCRIPTION
Expand Up @@ -7,7 +7,7 @@ Authors@R: c(
person("Guolin", "Ke", email = "guolin.ke@microsoft.com", role = c("aut", "cre")),
person("Damien", "Soukhavong", email = "damien.soukhavong@skema.edu", role = c("ctb")),
person("Yachen", "Yan", role = c("ctb")),
person("James", "Lamb", role = c("ctb"))
person("James", "Lamb", email="james.lamb@uptake.com", role = c("ctb"))
)
Description: Tree based algorithms can be improved by introducing boosting frameworks. LightGBM is one such framework, and this package offers an R interface to work with it.
It is designed to be distributed and efficient with the following advantages:
Expand All @@ -21,7 +21,6 @@ Description: Tree based algorithms can be improved by introducing boosting frame
License: MIT + file LICENSE
URL: https://github.com/Microsoft/LightGBM
BugReports: https://github.com/Microsoft/LightGBM/issues
VignetteBuilder: knitr
Suggests:
Ckmeans.1d.dp (>= 3.3.1),
DiagrammeR (>= 0.8.1),
Expand All @@ -33,7 +32,7 @@ Suggests:
testthat,
vcd (>= 1.3)
Depends:
R (>= 3.0),
R (>= 3.4),
R6 (>= 2.0)
Imports:
data.table (>= 1.9.6),
Expand Down
2 changes: 1 addition & 1 deletion R-package/NAMESPACE
Expand Up @@ -49,4 +49,4 @@ importFrom(magrittr,"%T>%")
importFrom(magrittr,extract)
importFrom(magrittr,inset)
importFrom(methods,is)
useDynLib(lib_lightgbm)
useDynLib(lib_lightgbm , .registration = TRUE)
3 changes: 2 additions & 1 deletion R-package/R/callback.R
@@ -1,4 +1,5 @@
CB_ENV <- R6Class(
#' @importFrom R6 R6Class
CB_ENV <- R6::R6Class(
"lgb.cb_env",
cloneable = FALSE,
public = list(
Expand Down
19 changes: 11 additions & 8 deletions R-package/R/lgb.Booster.R
@@ -1,4 +1,5 @@
Booster <- R6Class(
#' @importFrom R6 R6Class
Booster <- R6::R6Class(
classname = "lgb.Booster",
cloneable = FALSE,
public = list(
Expand Down Expand Up @@ -654,13 +655,15 @@ Booster <- R6Class(
#'
#' @rdname predict.lgb.Booster
#' @export
predict.lgb.Booster <- function(object, data,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE, ...) {
predict.lgb.Booster <- function(object,
data,
num_iteration = NULL,
rawscore = FALSE,
predleaf = FALSE,
predcontrib = FALSE,
header = FALSE,
reshape = FALSE,
...) {

# Check booster existence
if (!lgb.is.Booster(object)) {
Expand Down
8 changes: 5 additions & 3 deletions R-package/R/lgb.Dataset.R
@@ -1,6 +1,8 @@

#' @importFrom methods is
Dataset <- R6Class(
#' @importFrom R6 R6Class
Dataset <- R6::R6Class(

classname = "lgb.Dataset",
cloneable = FALSE,
public = list(
Expand Down Expand Up @@ -854,8 +856,8 @@ dimnames.lgb.Dataset <- function(x) {
#' Slice a dataset
#'
#' Get a new \code{lgb.Dataset} containing the specified rows of
#' orginal lgb.Dataset object
#'
#' original lgb.Dataset object
#'
#' @param dataset Object of class "lgb.Dataset"
#' @param idxset a integer vector of indices of rows needed
#' @param ... other parameters (currently not used)
Expand Down
4 changes: 3 additions & 1 deletion R-package/R/lgb.Predictor.R
@@ -1,6 +1,8 @@

#' @importFrom methods is
Predictor <- R6Class(
#' @importFrom R6 R6Class
Predictor <- R6::R6Class(

classname = "lgb.Predictor",
cloneable = FALSE,
public = list(
Expand Down
35 changes: 14 additions & 21 deletions R-package/R/lgb.cv.R
@@ -1,4 +1,5 @@
CVBooster <- R6Class(
#' @importFrom R6 R6Class
CVBooster <- R6::R6Class(
classname = "lgb.CVBooster",
cloneable = FALSE,
public = list(
Expand All @@ -17,46 +18,39 @@ CVBooster <- R6Class(
)

#' @title Main CV logic for LightGBM
#' @description Cross validation logic used by LightGBM
#' @name lgb.cv
#' @param params List of parameters
#' @param data a \code{lgb.Dataset} object, used for CV
#' @param nrounds number of CV rounds
#' @inheritParams lgb_shared_params
#' @param nfold the original dataset is randomly partitioned into \code{nfold} equal size subsamples.
#' @param label vector of response values. Should be provided only when data is an R-matrix.
#' @param weight vector of response values. If not NULL, will set to dataset
#' @param obj objective function, can be character or custom objective function. Examples include
#' \code{regression}, \code{regression_l1}, \code{huber},
#' \code{binary}, \code{lambdarank}, \code{multiclass}, \code{multiclass}
#' @param boosting boosting type. \code{gbdt}, \code{dart}
#' @param num_leaves number of leaves in one tree. defaults to 127
#' @param max_depth Limit the max depth for tree model. This is used to deal with overfit when #data is small.
#' Tree still grow by leaf-wise.
#' @param num_threads Number of threads for LightGBM. For the best speed, set this to the number of real CPU cores, not the number of threads (most CPU using hyper-threading to generate 2 threads per CPU core).
#' @param eval evaluation function, can be (list of) character or custom eval function
#' @param verbose verbosity for output, if <= 0, also will disable the print of evalutaion during training
#' @param record Boolean, TRUE will record iteration message to \code{booster$record_evals}
#' @param eval_freq evalutaion output frequence, only effect when verbose > 0
#' @param showsd \code{boolean}, whether to show standard deviation of cross validation
#' @param stratified a \code{boolean} indicating whether sampling of folds should be stratified
#' by the values of outcome labels.
#' @param folds \code{list} provides a possibility to use a list of pre-defined CV folds
#' (each element must be a vector of test fold's indices). When folds are supplied,
#' the \code{nfold} and \code{stratified} parameters are ignored.
#' @param init_model path of model file of \code{lgb.Booster} object, will continue train from this model
#' @param colnames feature names, if not null, will use this to overwrite the names in dataset
#' @param categorical_feature list of str or int
#' type int represents index,
#' type str represents feature names
#' @param early_stopping_rounds int
#' Activates early stopping.
#' CV score needs to improve at least every early_stopping_rounds round(s) to continue.
#' Requires at least one metric.
#' If there's more than one, will check all of them.
#' Returns the model with (best_iter + early_stopping_rounds).
#' If early stopping occurs, the model will have 'best_iter' field
#' @param callbacks list of callback functions
#' List of callback functions that are applied at each iteration.
#' @param ... other parameters, see Parameters.rst for more informations
#' @param ... other parameters, see Parameters.rst for more information. A few key parameters:
#' \itemize{
#' \item{boosting}{Boosting type. \code{"gbdt"} or \code{"dart"}}
#' \item{num_leaves}{number of leaves in one tree. defaults to 127}
#' \item{max_depth}{Limit the max depth for tree model. This is used to deal with
#' overfit when #data is small. Tree still grow by leaf-wise.}
#' \item{num_threads}{Number of threads for LightGBM. For the best speed, set this to
#' the number of real CPU cores, not the number of threads (most
#' CPU using hyper-threading to generate 2 threads per CPU core).}
#' }
#'
#' @return a trained model \code{lgb.CVBooster}.
#'
Expand All @@ -75,7 +69,6 @@ CVBooster <- R6Class(
#' learning_rate = 1,
#' early_stopping_rounds = 10)
#' }
#' @rdname lgb.train
#' @export
lgb.cv <- function(params = list(),
data,
Expand Down
5 changes: 3 additions & 2 deletions R-package/R/lgb.model.dt.tree.R
Expand Up @@ -20,7 +20,7 @@
#' \item \code{leaf_index}: ID of a leaf in a tree (integer)
#' \item \code{leaf_parent}: ID of the parent node for current leaf (integer)
#' \item \code{split_gain}: Split gain of a node
#' \item \code{threshold}: Spliting threshold value of a node
#' \item \code{threshold}: Splitting threshold value of a node
#' \item \code{decision_type}: Decision type of a node
#' \item \code{default_left}: Determine how to handle NA value, TRUE -> Left, FALSE -> Right
#' \item \code{internal_value}: Node value
Expand All @@ -47,7 +47,7 @@
#' }
#'
#' @importFrom magrittr %>%
#' @importFrom data.table := data.table
#' @importFrom data.table := data.table rbindlist
#' @importFrom jsonlite fromJSON
#' @export
lgb.model.dt.tree <- function(model, num_iteration = NULL) {
Expand Down Expand Up @@ -78,6 +78,7 @@ lgb.model.dt.tree <- function(model, num_iteration = NULL) {

}


#' @importFrom data.table data.table rbindlist
single.tree.parse <- function(lgb_tree) {

Expand Down

0 comments on commit eded794

Please sign in to comment.