Skip to content

Commit

Permalink
[R-package] discourage use of regex for fixed string comparisons (#5685)
Browse files Browse the repository at this point in the history
  • Loading branch information
jameslamb committed Jan 31, 2023
1 parent 3ae0484 commit 9954bc4
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 28 deletions.
1 change: 1 addition & 0 deletions .ci/lint_r_code.R
Expand Up @@ -42,6 +42,7 @@ LINTERS_TO_USE <- list(
, "implicit_integers" = lintr::implicit_integer_linter()
, "infix_spaces" = lintr::infix_spaces_linter()
, "inner_combine" = lintr::inner_combine_linter()
, "fixed_regex" = lintr::fixed_regex_linter()
, "literal_coercion" = lintr::literal_coercion_linter()
, "long_lines" = lintr::line_length_linter(length = 120L)
, "missing_argument" = lintr::missing_argument_linter()
Expand Down
2 changes: 1 addition & 1 deletion R-package/R/utils.R
Expand Up @@ -25,7 +25,7 @@ lgb.params2str <- function(params) {
stop("params must be a list")
}

names(params) <- gsub("\\.", "_", names(params))
names(params) <- gsub(".", "_", names(params), fixed = TRUE)
param_names <- names(params)
ret <- list()

Expand Down
4 changes: 2 additions & 2 deletions R-package/inst/make-r-def.R
Expand Up @@ -81,9 +81,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]

# Read the contents of the table
exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
exported_symbols <- gsub("\t", "", exported_symbols)
exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
exported_symbols <- gsub(".*\\] ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols)
exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)

# Write R.def file
writeLines(
Expand Down
36 changes: 23 additions & 13 deletions R-package/tests/testthat/test_basic.R
Expand Up @@ -618,6 +618,7 @@ test_that("lgb.cv() prefers objective in params to keyword argument", {
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand Down Expand Up @@ -780,6 +781,7 @@ test_that("lgb.train() prefers objective in params to keyword argument", {
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand Down Expand Up @@ -2296,8 +2298,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
, verbose = 1L
)
})
expect_true(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))

# (verbose = 0) should be WARNING level logs only
lgb_cv_logs <- capture.output({
Expand All @@ -2310,8 +2312,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
, verbose = 0L
)
})
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))

# (verbose = -1) no logs
lgb_cv_logs <- capture.output({
Expand All @@ -2326,8 +2328,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
})
# NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
# dependencies might print other messages
expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
expect_false(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
})

test_that("lgb.cv() updates params based on keyword arguments", {
Expand Down Expand Up @@ -2918,6 +2920,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
model_txt_lines <- strsplit(
x = bst1$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand All @@ -2933,6 +2936,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
model_txt_lines <- strsplit(
x = bst2$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand All @@ -2951,6 +2955,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
model_txt_lines <- strsplit(
x = bst1$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand All @@ -2967,6 +2972,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
model_txt_lines <- strsplit(
x = bst2$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression_l1"))
expect_false(any(model_txt_lines == "objective=regression_l2"))
Expand Down Expand Up @@ -3006,6 +3012,7 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(model_txt_lines == "objective=regression"))
expect_false(any(model_txt_lines == "objective=regression_l1"))
Expand All @@ -3023,8 +3030,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))

bst <- lightgbm(
data = train$data
Expand All @@ -3037,8 +3045,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))

bst <- lightgbm(
data = train$data
Expand All @@ -3052,8 +3061,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
model_txt_lines <- strsplit(
x = bst$save_model_to_string()
, split = "\n"
, fixed = TRUE
)[[1L]]
expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
})

test_that("lightgbm() accepts 'weight' and 'weights'", {
Expand Down Expand Up @@ -3090,19 +3100,19 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {

.assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
expect_identical(
object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt))
object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
, expected = lgb_info
)
expect_identical(
object = any(grepl("\\[LightGBM\\] \\[Warning\\]", log_txt))
object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
, expected = lgb_warn
)
expect_identical(
object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt))
object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
, expected = early_stopping
)
expect_identical(
object = any(grepl("Did not meet early stopping", log_txt))
object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
, expected = early_stopping
)
expect_identical(
Expand Down
4 changes: 2 additions & 2 deletions R-package/tests/testthat/test_learning_to_rank.R
Expand Up @@ -19,7 +19,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
Expand Down Expand Up @@ -81,7 +81,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
, group = rep(150L, 40L)
)
ndcg_at <- "1,2,3"
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
params <- list(
objective = "lambdarank"
, metric = "ndcg"
Expand Down
6 changes: 3 additions & 3 deletions R-package/tests/testthat/test_lgb.Booster.R
Expand Up @@ -705,7 +705,7 @@ test_that("Saving a model with different feature importance types works", {
expect_true(lgb.is.Booster(bst))

.feat_importance_from_string <- function(model_string) {
file_lines <- strsplit(model_string, "\n")[[1L]]
file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
Expand Down Expand Up @@ -771,7 +771,7 @@ test_that("Saving a model with unknown importance type fails", {


.params_from_model_string <- function(model_str) {
file_lines <- strsplit(model_str, "\n")[[1L]]
file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
blank_line_indices <- which(file_lines == "")
end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
Expand Down Expand Up @@ -1286,7 +1286,7 @@ test_that("Booster's print, show, and summary work correctly", {

.has_expected_content_for_finalized_model <- function(printed_txt) {
expect_true(any(grepl("^LightGBM Model$", printed_txt)))
expect_true(any(grepl("Booster handle is invalid", printed_txt)))
expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
}

.check_methods_work <- function(model) {
Expand Down
20 changes: 13 additions & 7 deletions build_r.R
Expand Up @@ -26,8 +26,8 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
for (arg in args) {
if (any(grepl("^\\-j[0-9]+", arg))) { # nolint: non_portable_path
out_list[["make_args"]] <- arg
} else if (any(grepl("=", arg))) {
split_arg <- strsplit(arg, "=")[[1L]]
} else if (any(grepl("=", arg, fixed = TRUE))) {
split_arg <- strsplit(arg, "=", fixed = TRUE)[[1L]]
arg_name <- split_arg[[1L]]
arg_value <- split_arg[[2L]]
out_list[["keyword_args"]][[arg_name]] <- arg_value
Expand Down Expand Up @@ -371,6 +371,7 @@ LGB_VERSION <- gsub(
pattern = "rc"
, replacement = "-"
, x = LGB_VERSION
, fixed = TRUE
)

# DESCRIPTION has placeholders for version
Expand All @@ -381,11 +382,13 @@ description_contents <- gsub(
pattern = "~~VERSION~~"
, replacement = LGB_VERSION
, x = description_contents
, fixed = TRUE
)
description_contents <- gsub(
pattern = "~~DATE~~"
, replacement = as.character(Sys.Date())
, x = description_contents
, fixed = TRUE
)
writeLines(description_contents, DESCRIPTION_FILE)

Expand All @@ -410,6 +413,7 @@ c_api_contents <- gsub(
pattern = "LIGHTGBM_C_EXPORT SEXP "
, replacement = ""
, x = c_api_contents
, fixed = TRUE
)
c_api_symbols <- gsub(
pattern = "\\(.*"
Expand All @@ -435,13 +439,15 @@ if (isTRUE(SKIP_VIGNETTES)) {

# Install the package
version <- gsub(
"Version: ",
"",
grep(
"Version: "
, readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
pattern = "Version: ",
replacement = "",
x = grep(
pattern = "Version: "
, x = readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
, value = TRUE
, fixed = TRUE
)
, fixed = TRUE
)
tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))

Expand Down

0 comments on commit 9954bc4

Please sign in to comment.