[R-package] discourage use of regex for fixed string comparisons (#5685)

microsoft · Jan 31, 2023 · 9954bc4 · 9954bc4
1 parent 3ae0484
commit 9954bc4
Show file tree

Hide file tree

Showing 7 changed files with 45 additions and 28 deletions.
diff --git a/.ci/lint_r_code.R b/.ci/lint_r_code.R
@@ -42,6 +42,7 @@ LINTERS_TO_USE <- list(
     , "implicit_integers"    = lintr::implicit_integer_linter()
     , "infix_spaces"         = lintr::infix_spaces_linter()
     , "inner_combine"        = lintr::inner_combine_linter()
+    , "fixed_regex"          = lintr::fixed_regex_linter()
     , "literal_coercion"     = lintr::literal_coercion_linter()
     , "long_lines"           = lintr::line_length_linter(length = 120L)
     , "missing_argument"     = lintr::missing_argument_linter()

diff --git a/R-package/R/utils.R b/R-package/R/utils.R
@@ -25,7 +25,7 @@ lgb.params2str <- function(params) {
     stop("params must be a list")
   }
 
-  names(params) <- gsub("\\.", "_", names(params))
+  names(params) <- gsub(".", "_", names(params), fixed = TRUE)
   param_names <- names(params)
   ret <- list()
 

diff --git a/R-package/inst/make-r-def.R b/R-package/inst/make-r-def.R
@@ -81,9 +81,9 @@ end_of_table <- empty_lines[empty_lines > start_index][1L]
 
 # Read the contents of the table
 exported_symbols <- objdump_results[(start_index + 1L):end_of_table]
-exported_symbols <- gsub("\t", "", exported_symbols)
+exported_symbols <- gsub("\t", "", exported_symbols, fixed = TRUE)
 exported_symbols <- gsub(".*\\] ", "", exported_symbols)
-exported_symbols <- gsub(" ", "", exported_symbols)
+exported_symbols <- gsub(" ", "", exported_symbols, fixed = TRUE)
 
 # Write R.def file
 writeLines(

diff --git a/R-package/tests/testthat/test_basic.R b/R-package/tests/testthat/test_basic.R
@@ -618,6 +618,7 @@ test_that("lgb.cv() prefers objective in params to keyword argument", {
     model_txt_lines <- strsplit(
       x = bst$save_model_to_string()
       , split = "\n"
+      , fixed = TRUE
     )[[1L]]
     expect_true(any(model_txt_lines == "objective=regression_l1"))
     expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -780,6 +781,7 @@ test_that("lgb.train() prefers objective in params to keyword argument", {
   model_txt_lines <- strsplit(
     x = bst$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression_l1"))
   expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -2296,8 +2298,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
       , verbose = 1L
     )
   })
-  expect_true(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
-  expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_true(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
+  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
 
   # (verbose = 0) should be WARNING level logs only
   lgb_cv_logs <- capture.output({
@@ -2310,8 +2312,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
       , verbose = 0L
     )
   })
-  expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
-  expect_true(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
+  expect_true(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
 
   # (verbose = -1) no logs
   lgb_cv_logs <- capture.output({
@@ -2326,8 +2328,8 @@ test_that("lgb.cv() respects changes to logging verbosity", {
   })
   # NOTE: this is not length(lgb_cv_logs) == 0 because lightgbm's
   #       dependencies might print other messages
-  expect_false(any(grepl("\\[LightGBM\\] \\[Info\\]", lgb_cv_logs)))
-  expect_false(any(grepl("\\[LightGBM\\] \\[Warning\\]", lgb_cv_logs)))
+  expect_false(any(grepl("[LightGBM] [Info]", lgb_cv_logs, fixed = TRUE)))
+  expect_false(any(grepl("[LightGBM] [Warning]", lgb_cv_logs, fixed = TRUE)))
 })
 
 test_that("lgb.cv() updates params based on keyword arguments", {
@@ -2918,6 +2920,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
   model_txt_lines <- strsplit(
     x = bst1$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression_l1"))
   expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -2933,6 +2936,7 @@ test_that("lightgbm() accepts objective as function argument and under params",
   model_txt_lines <- strsplit(
     x = bst2$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression_l1"))
   expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -2951,6 +2955,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
   model_txt_lines <- strsplit(
     x = bst1$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression_l1"))
   expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -2967,6 +2972,7 @@ test_that("lightgbm() prioritizes objective under params over objective as funct
   model_txt_lines <- strsplit(
     x = bst2$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression_l1"))
   expect_false(any(model_txt_lines == "objective=regression_l2"))
@@ -3006,6 +3012,7 @@ test_that("lightgbm() defaults to 'regression' objective if objective not otherw
   model_txt_lines <- strsplit(
     x = bst$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
   expect_true(any(model_txt_lines == "objective=regression"))
   expect_false(any(model_txt_lines == "objective=regression_l1"))
@@ -3023,8 +3030,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
   model_txt_lines <- strsplit(
     x = bst$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
 
   bst <- lightgbm(
     data = train$data
@@ -3037,8 +3045,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
   model_txt_lines <- strsplit(
     x = bst$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
 
   bst <- lightgbm(
     data = train$data
@@ -3052,8 +3061,9 @@ test_that("lightgbm() accepts 'num_threads' as either top-level argument or unde
   model_txt_lines <- strsplit(
     x = bst$save_model_to_string()
     , split = "\n"
+    , fixed = TRUE
   )[[1L]]
-  expect_true(any(grepl("\\[num_threads: 1\\]", model_txt_lines)))
+  expect_true(any(grepl("[num_threads: 1]", model_txt_lines, fixed = TRUE)))
 })
 
 test_that("lightgbm() accepts 'weight' and 'weights'", {
@@ -3090,19 +3100,19 @@ test_that("lightgbm() accepts 'weight' and 'weights'", {
 
 .assert_has_expected_logs <- function(log_txt, lgb_info, lgb_warn, early_stopping, valid_eval_msg) {
   expect_identical(
-    object = any(grepl("\\[LightGBM\\] \\[Info\\]", log_txt))
+    object = any(grepl("[LightGBM] [Info]", log_txt, fixed = TRUE))
     , expected = lgb_info
   )
   expect_identical(
-    object = any(grepl("\\[LightGBM\\] \\[Warning\\]", log_txt))
+    object = any(grepl("[LightGBM] [Warning]", log_txt, fixed = TRUE))
     , expected = lgb_warn
   )
   expect_identical(
-    object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt))
+    object = any(grepl("Will train until there is no improvement in 5 rounds", log_txt, fixed = TRUE))
     , expected = early_stopping
   )
   expect_identical(
-    object = any(grepl("Did not meet early stopping", log_txt))
+    object = any(grepl("Did not meet early stopping", log_txt, fixed = TRUE))
     , expected = early_stopping
   )
   expect_identical(

diff --git a/R-package/tests/testthat/test_learning_to_rank.R b/R-package/tests/testthat/test_learning_to_rank.R
@@ -19,7 +19,7 @@ test_that("learning-to-rank with lgb.train() works as expected", {
         , group = rep(150L, 40L)
     )
     ndcg_at <- "1,2,3"
-    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
+    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
     params <- list(
         objective = "lambdarank"
         , metric = "ndcg"
@@ -81,7 +81,7 @@ test_that("learning-to-rank with lgb.cv() works as expected", {
         , group = rep(150L, 40L)
     )
     ndcg_at <- "1,2,3"
-    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",")[[1L]])
+    eval_names <- paste0("ndcg@", strsplit(ndcg_at, ",", fixed = TRUE)[[1L]])
     params <- list(
         objective = "lambdarank"
         , metric = "ndcg"

diff --git a/R-package/tests/testthat/test_lgb.Booster.R b/R-package/tests/testthat/test_lgb.Booster.R
@@ -705,7 +705,7 @@ test_that("Saving a model with different feature importance types works", {
     expect_true(lgb.is.Booster(bst))
 
     .feat_importance_from_string <- function(model_string) {
-        file_lines <- strsplit(model_string, "\n")[[1L]]
+        file_lines <- strsplit(model_string, "\n", fixed = TRUE)[[1L]]
         start_indx <- which(grepl("^feature_importances\\:$", file_lines)) + 1L
         blank_line_indices <- which(file_lines == "")
         end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@@ -771,7 +771,7 @@ test_that("Saving a model with unknown importance type fails", {
 
 
 .params_from_model_string <- function(model_str) {
-    file_lines <- strsplit(model_str, "\n")[[1L]]
+    file_lines <- strsplit(model_str, "\n", fixed = TRUE)[[1L]]
     start_indx <- which(grepl("^parameters\\:$", file_lines)) + 1L
     blank_line_indices <- which(file_lines == "")
     end_indx <- blank_line_indices[blank_line_indices > start_indx][1L] - 1L
@@ -1286,7 +1286,7 @@ test_that("Booster's print, show, and summary work correctly", {
 
     .has_expected_content_for_finalized_model <- function(printed_txt) {
       expect_true(any(grepl("^LightGBM Model$", printed_txt)))
-      expect_true(any(grepl("Booster handle is invalid", printed_txt)))
+      expect_true(any(grepl("Booster handle is invalid", printed_txt, fixed = TRUE)))
     }
 
     .check_methods_work <- function(model) {

diff --git a/build_r.R b/build_r.R
@@ -26,8 +26,8 @@ TEMP_SOURCE_DIR <- file.path(TEMP_R_DIR, "src")
   for (arg in args) {
     if (any(grepl("^\\-j[0-9]+", arg))) {  # nolint: non_portable_path
         out_list[["make_args"]] <- arg
-    } else if (any(grepl("=", arg))) {
-      split_arg <- strsplit(arg, "=")[[1L]]
+    } else if (any(grepl("=", arg, fixed = TRUE))) {
+      split_arg <- strsplit(arg, "=", fixed = TRUE)[[1L]]
       arg_name <- split_arg[[1L]]
       arg_value <- split_arg[[2L]]
       out_list[["keyword_args"]][[arg_name]] <- arg_value
@@ -371,6 +371,7 @@ LGB_VERSION <- gsub(
   pattern = "rc"
   , replacement = "-"
   , x = LGB_VERSION
+  , fixed = TRUE
 )
 
 # DESCRIPTION has placeholders for version
@@ -381,11 +382,13 @@ description_contents <- gsub(
   pattern = "~~VERSION~~"
   , replacement = LGB_VERSION
   , x = description_contents
+  , fixed = TRUE
 )
 description_contents <- gsub(
   pattern = "~~DATE~~"
   , replacement = as.character(Sys.Date())
   , x = description_contents
+  , fixed = TRUE
 )
 writeLines(description_contents, DESCRIPTION_FILE)
 
@@ -410,6 +413,7 @@ c_api_contents <- gsub(
   pattern = "LIGHTGBM_C_EXPORT SEXP "
   , replacement = ""
   , x = c_api_contents
+  , fixed = TRUE
 )
 c_api_symbols <- gsub(
   pattern = "\\(.*"
@@ -435,13 +439,15 @@ if (isTRUE(SKIP_VIGNETTES)) {
 
 # Install the package
 version <- gsub(
-  "Version: ",
-  "",
-  grep(
-    "Version: "
-    , readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
+  pattern = "Version: ",
+  replacement = "",
+  x = grep(
+    pattern = "Version: "
+    , x = readLines(con = file.path(TEMP_R_DIR, "DESCRIPTION"))
     , value = TRUE
+    , fixed = TRUE
   )
+  , fixed = TRUE
 )
 tarball <- file.path(getwd(), sprintf("lightgbm_%s.tar.gz", version))