-
Notifications
You must be signed in to change notification settings - Fork 123
Closed
Labels
bugan unexpected problem or unintended behavioran unexpected problem or unintended behavior
Description
The problem
I'm having trouble with the use of dplyr::recode() inside of step_mutate() from the recipes package. I am trying to recode text in a character variable. In some instances, these recodes work fine. But in other instances, it produces an error (Error in vctrs::list_unchop() ). I provide a number of examples below of when it works and when it produces an error. My apologies if there is something fundamentally wrong with my code but I can't detect what that could be.
Reproducible example
library(recipes)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#>
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#>
#> step
library(modeldata)
# Simplify ames data and convert MS_Zoning to char to match class of data
# I am working with
ames_1 <- ames %>%
select(Sale_Price, MS_Zoning) %>%
mutate(MS_Zoning = as.character(MS_Zoning)) %>%
glimpse()
#> Rows: 2,930
#> Columns: 2
#> $ Sale_Price <int> 215000, 105000, 172000, 244000, 189900, 195500, 213500, 191…
#> $ MS_Zoning <chr> "Residential_Low_Density", "Residential_High_Density", "Res…
# display unique values of MS_Zoning with counts
ames_1 %>%
pull(MS_Zoning) %>%
table()
#> .
#> A_agr C_all
#> 2 25
#> Floating_Village_Residential I_all
#> 139 2
#> Residential_High_Density Residential_Low_Density
#> 27 2273
#> Residential_Medium_Density
#> 462
# recode works in this recipe
rec_ex1 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
Residential_Low_Density = "r_ld"))
rec_ex1
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for dplyr::recode(.x = MS_Zoning, Residential_...
# error (with two residential strings to recode)
rec_ex2 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
Residential_Low_Density = "r_ld",
Residential_Medium_Density = "r_md"))
rec_ex2
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for
#> Error in `vctrs::list_unchop()`:
#> ! Can't merge the outer name `MS_Zoning` with a vector of length > 1.
#> Please supply a `.name_spec` specification.
#> Backtrace:
#> ▆
#> 1. ├─base::tryCatch(...)
#> 2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 3. │ ├─base (local) tryCatchOne(...)
#> 4. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 5. │ └─base (local) tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
#> 6. │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 7. │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 8. ├─base::withCallingHandlers(...)
#> 9. ├─base::saveRDS(...)
#> 10. ├─base::do.call(...)
#> 11. ├─base (local) `<fn>`(...)
#> 12. ├─global `<fn>`(input = base::quote("mid-guppy_reprex.R"))
#> 13. │ └─rmarkdown::render(input, quiet = TRUE, envir = globalenv(), encoding = "UTF-8")
#> 14. │ └─knitr::knit(knit_input, knit_output, envir = envir, quiet = quiet)
#> 15. │ └─knitr:::process_file(text, output)
#> 16. │ ├─base::withCallingHandlers(...)
#> 17. │ ├─knitr:::process_group(group)
#> 18. │ └─knitr:::process_group.block(group)
#> 19. │ └─knitr:::call_block(x)
#> 20. │ └─knitr:::block_exec(params)
#> 21. │ └─knitr:::eng_r(options)
#> 22. │ ├─knitr:::in_input_dir(...)
#> 23. │ │ └─knitr:::in_dir(input_dir(), expr)
#> 24. │ └─knitr (local) evaluate(...)
#> 25. │ └─evaluate::evaluate(...)
#> 26. │ └─evaluate:::evaluate_call(...)
#> 27. │ ├─evaluate (local) handle(...)
#> 28. │ │ └─base::try(f, silent = TRUE)
#> 29. │ │ └─base::tryCatch(...)
#> 30. │ │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 31. │ │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 32. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 33. │ ├─base::withCallingHandlers(...)
#> 34. │ ├─base::withVisible(value_fun(ev$value, ev$visible))
#> 35. │ └─knitr (local) value_fun(ev$value, ev$visible)
#> 36. │ └─knitr (local) fun(x, options = options)
#> 37. │ ├─base::withVisible(knit_print(x, ...))
#> 38. │ ├─knitr::knit_print(x, ...)
#> 39. │ └─knitr:::knit_print.default(x, ...)
#> 40. │ └─evaluate (local) normal_print(x)
#> 41. │ ├─base::print(x)
#> 42. │ └─recipes:::print.recipe(x)
#> 43. │ ├─base::print(x$steps[[i]], form_width = form_width)
#> 44. │ └─recipes:::print.step_mutate(x$steps[[i]], form_width = form_width)
#> 45. │ └─recipes::print_step(x$inputs, x$inputs, x$trained, title, width)
#> 46. │ └─recipes::format_selectors(untr_obj, width = width)
#> 47. │ └─vctrs::list_unchop(x_items, ptype = character())
#> 48. └─rlang::abort(message = message)
#recode works in this recipe (not a specific issue with Residential_Medium_Density)
rec_ex3 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
Residential_Medium_Density = "r_md"))
rec_ex3
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for dplyr::recode(.x = MS_Zoning, Residential_...
# recode works in this recipe (not an issue with two recodes)
rec_ex4 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
C_all = "c",
I_all = "i"))
rec_ex4
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for dplyr::recode(.x = MS_Zoning, C_all = "c",...
# recode works in this recipe (or with three recodes)
rec_ex5 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
C_all = "c",
I_all = "i",
A_agr = "a"))
rec_ex5
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for dplyr::recode(.x = MS_Zoning, C_all = "c",...
# error (with a different residential string added to other strings)
rec_ex6 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
C_all = "c",
I_all = "i",
Residential_High_Density = "r_hd"))
rec_ex6
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for
#> Error in `vctrs::list_unchop()`:
#> ! Can't merge the outer name `MS_Zoning` with a vector of length > 1.
#> Please supply a `.name_spec` specification.
#> Backtrace:
#> ▆
#> 1. ├─base::tryCatch(...)
#> 2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 3. │ ├─base (local) tryCatchOne(...)
#> 4. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 5. │ └─base (local) tryCatchList(expr, names[-nh], parentenv, handlers[-nh])
#> 6. │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 7. │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 8. ├─base::withCallingHandlers(...)
#> 9. ├─base::saveRDS(...)
#> 10. ├─base::do.call(...)
#> 11. ├─base (local) `<fn>`(...)
#> 12. ├─global `<fn>`(input = base::quote("mid-guppy_reprex.R"))
#> 13. │ └─rmarkdown::render(input, quiet = TRUE, envir = globalenv(), encoding = "UTF-8")
#> 14. │ └─knitr::knit(knit_input, knit_output, envir = envir, quiet = quiet)
#> 15. │ └─knitr:::process_file(text, output)
#> 16. │ ├─base::withCallingHandlers(...)
#> 17. │ ├─knitr:::process_group(group)
#> 18. │ └─knitr:::process_group.block(group)
#> 19. │ └─knitr:::call_block(x)
#> 20. │ └─knitr:::block_exec(params)
#> 21. │ └─knitr:::eng_r(options)
#> 22. │ ├─knitr:::in_input_dir(...)
#> 23. │ │ └─knitr:::in_dir(input_dir(), expr)
#> 24. │ └─knitr (local) evaluate(...)
#> 25. │ └─evaluate::evaluate(...)
#> 26. │ └─evaluate:::evaluate_call(...)
#> 27. │ ├─evaluate (local) handle(...)
#> 28. │ │ └─base::try(f, silent = TRUE)
#> 29. │ │ └─base::tryCatch(...)
#> 30. │ │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 31. │ │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 32. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 33. │ ├─base::withCallingHandlers(...)
#> 34. │ ├─base::withVisible(value_fun(ev$value, ev$visible))
#> 35. │ └─knitr (local) value_fun(ev$value, ev$visible)
#> 36. │ └─knitr (local) fun(x, options = options)
#> 37. │ ├─base::withVisible(knit_print(x, ...))
#> 38. │ ├─knitr::knit_print(x, ...)
#> 39. │ └─knitr:::knit_print.default(x, ...)
#> 40. │ └─evaluate (local) normal_print(x)
#> 41. │ ├─base::print(x)
#> 42. │ └─recipes:::print.recipe(x)
#> 43. │ ├─base::print(x$steps[[i]], form_width = form_width)
#> 44. │ └─recipes:::print.step_mutate(x$steps[[i]], form_width = form_width)
#> 45. │ └─recipes::print_step(x$inputs, x$inputs, x$trained, title, width)
#> 46. │ └─recipes::format_selectors(untr_obj, width = width)
#> 47. │ └─vctrs::list_unchop(x_items, ptype = character())
#> 48. └─rlang::abort(message = message)
# recode works in this recipe (but that string works by itself too)
rec_ex7 <-
recipe(Sale_Price ~ ., data = ames_1) %>%
step_mutate(MS_Zoning = dplyr::recode(.x = MS_Zoning,
Residential_High_Density = "r_hd"))
rec_ex7
#> Recipe
#>
#> Inputs:
#>
#> role #variables
#> outcome 1
#> predictor 1
#>
#> Operations:
#>
#> Variable mutation for dplyr::recode(.x = MS_Zoning, Residential_...
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.2 (2022-10-31 ucrt)
#> os Windows 10 x64 (build 22000)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/Chicago
#> date 2023-02-06
#> pandoc 2.19.2 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.1)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.2)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.2)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.2)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.1)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.2)
#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.2)
#> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.1)
#> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.1)
#> fs 1.6.0 2023-01-23 [1] CRAN (R 4.2.2)
#> future 1.31.0 2023-02-01 [1] CRAN (R 4.2.2)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.2)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.2)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.1)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.2)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.1)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.2)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.1)
#> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.2)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.2)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.2)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.2)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.2)
#> lubridate 1.9.1 2023-01-24 [1] CRAN (R 4.2.2)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.1)
#> MASS 7.3-58.1 2022-08-03 [2] CRAN (R 4.2.2)
#> Matrix 1.5-3 2022-11-11 [1] CRAN (R 4.2.2)
#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.2)
#> nnet 7.3-18 2022-09-28 [2] CRAN (R 4.2.2)
#> parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.2)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.1)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.1)
#> purrr 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.2)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0)
#> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.2)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.1)
#> Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.2.2)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.2)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.2)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.2)
#> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.2)
#> rpart 4.1.19 2022-10-21 [2] CRAN (R 4.2.2)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.2)
#> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.2)
#> survival 3.4-0 2022-08-09 [2] CRAN (R 4.2.2)
#> tibble 3.1.8 2022-07-22 [1] CRAN (R 4.2.1)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.2)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.2)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.1)
#> vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.2)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.1)
#> xfun 0.36 2022-12-21 [1] CRAN (R 4.2.2)
#> yaml 2.3.6 2022-10-18 [1] CRAN (R 4.2.2)
#>
#> [1] C:/Users/jjcurtin/AppData/Local/R/win-library/4.2
#> [2] C:/Program Files/R/R-4.2.2/library
#>
#> ──────────────────────────────────────────────────────────────────────────────Created on 2023-02-06 with reprex v2.0.2
Session info
sessioninfo::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#> setting value
#> version R version 4.2.2 (2022-10-31 ucrt)
#> os Windows 10 x64 (build 22000)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.utf8
#> ctype English_United States.utf8
#> tz America/Chicago
#> date 2023-02-06
#> pandoc 2.19.2 @ C:/Program Files/RStudio/resources/app/bin/quarto/bin/tools/ (via rmarkdown)
#>
#> ─ Packages ───────────────────────────────────────────────────────────────────
#> package * version date (UTC) lib source
#> assertthat 0.2.1 2019-03-21 [1] CRAN (R 4.2.1)
#> class 7.3-20 2022-01-16 [2] CRAN (R 4.2.2)
#> cli 3.6.0 2023-01-09 [1] CRAN (R 4.2.2)
#> codetools 0.2-18 2020-11-04 [2] CRAN (R 4.2.2)
#> DBI 1.1.3 2022-06-18 [1] CRAN (R 4.2.1)
#> digest 0.6.31 2022-12-11 [1] CRAN (R 4.2.2)
#> dplyr * 1.0.10 2022-09-01 [1] CRAN (R 4.2.2)
#> evaluate 0.20 2023-01-17 [1] CRAN (R 4.2.1)
#> fansi 1.0.4 2023-01-22 [1] CRAN (R 4.2.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.2.1)
#> fs 1.6.0 2023-01-23 [1] CRAN (R 4.2.2)
#> future 1.31.0 2023-02-01 [1] CRAN (R 4.2.2)
#> future.apply 1.10.0 2022-11-05 [1] CRAN (R 4.2.2)
#> generics 0.1.3 2022-07-05 [1] CRAN (R 4.2.1)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.2.2)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.2.1)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.2.2)
#> hardhat 1.2.0 2022-06-30 [1] CRAN (R 4.2.1)
#> htmltools 0.5.4 2022-12-07 [1] CRAN (R 4.2.2)
#> ipred 0.9-13 2022-06-02 [1] CRAN (R 4.2.1)
#> knitr 1.42 2023-01-25 [1] CRAN (R 4.2.2)
#> lattice 0.20-45 2021-09-22 [2] CRAN (R 4.2.2)
#> lava 1.7.1 2023-01-06 [1] CRAN (R 4.2.2)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.2.2)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.2.2)
#> lubridate 1.9.1 2023-01-24 [1] CRAN (R 4.2.2)
#> magrittr 2.0.3 2022-03-30 [1] CRAN (R 4.2.1)
#> MASS 7.3-58.1 2022-08-03 [2] CRAN (R 4.2.2)
#> Matrix 1.5-3 2022-11-11 [1] CRAN (R 4.2.2)
#> modeldata * 1.1.0 2023-01-25 [1] CRAN (R 4.2.2)
#> nnet 7.3-18 2022-09-28 [2] CRAN (R 4.2.2)
#> parallelly 1.34.0 2023-01-13 [1] CRAN (R 4.2.2)
#> pillar 1.8.1 2022-08-19 [1] CRAN (R 4.2.1)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.2.1)
#> prodlim 2019.11.13 2019-11-17 [1] CRAN (R 4.2.1)
#> purrr 1.0.1 2023-01-10 [1] CRAN (R 4.2.2)
#> R.cache 0.16.0 2022-07-21 [1] CRAN (R 4.2.2)
#> R.methodsS3 1.8.2 2022-06-13 [1] CRAN (R 4.2.0)
#> R.oo 1.25.0 2022-06-12 [1] CRAN (R 4.2.0)
#> R.utils 2.12.2 2022-11-11 [1] CRAN (R 4.2.2)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.2.1)
#> Rcpp 1.0.10 2023-01-22 [1] CRAN (R 4.2.2)
#> recipes * 1.0.4 2023-01-11 [1] CRAN (R 4.2.2)
#> reprex 2.0.2 2022-08-17 [1] CRAN (R 4.2.2)
#> rlang 1.0.6 2022-09-24 [1] CRAN (R 4.2.2)
#> rmarkdown 2.20 2023-01-19 [1] CRAN (R 4.2.2)
#> rpart 4.1.19 2022-10-21 [2] CRAN (R 4.2.2)
#> rstudioapi 0.14 2022-08-22 [1] CRAN (R 4.2.1)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.2.2)
#> styler 1.9.0 2023-01-15 [1] CRAN (R 4.2.2)
#> survival 3.4-0 2022-08-09 [2] CRAN (R 4.2.2)
#> tibble 3.1.8 2022-07-22 [1] CRAN (R 4.2.1)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.2.2)
#> timechange 0.2.0 2023-01-11 [1] CRAN (R 4.2.2)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.2.2)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.2.1)
#> vctrs 0.5.2 2023-01-23 [1] CRAN (R 4.2.2)
#> withr 2.5.0 2022-03-03 [1] CRAN (R 4.2.1)
#> xfun 0.36 2022-12-21 [1] CRAN (R 4.2.2)
#> yaml 2.3.6 2022-10-18 [1] CRAN (R 4.2.2)
#>
#> [1] C:/Users/jjcurtin/AppData/Local/R/win-library/4.2
#> [2] C:/Program Files/R/R-4.2.2/library
#>
#> ──────────────────────────────────────────────────────────────────────────────Reactions are currently unavailable
Metadata
Metadata
Assignees
Labels
bugan unexpected problem or unintended behavioran unexpected problem or unintended behavior