-
Notifications
You must be signed in to change notification settings - Fork 122
Description
The problem
There seems to be an issue with the step_interact() function when attempting to create multiple interactions and two of the variable names have more than a certain number of characters (12 to 14?) is specified as the first interaction, but not when specifying it as the second interaction.
Reproducible example
I originally ran into the problem when using two variables with the 14 characters in the name where Interactions with: gives ... which leads to the error in rlang::f_rhs() when calling prep().
library(recipes)
#> Loading required package: dplyr
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
#>
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#>
#> step
df <- data.frame(
a = 1:10,
bbbbbbbbbbbbbb = 1:10, # 14
cccccccccccccc = 1:10, # 14
d = 1:10
)
# error
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('d')) %>%
prep(training = df)
#> Warning: Interaction specification failed for: ~.... No interactions will be
#> created.
#> Error in `step_interact()`:
#> Caused by error in `rlang::f_rhs()`:
#> ! `x` must be a formulaRecipe output without prep:
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...Switching the order of specifying the interaction allows it to work.
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbbbbb"):starts_with("cccccccccccccc")I then proceeded to try to find a pattern for the error, but have been unable to discern anything from my attempt.
The following are additional things I tried for finding a pattern. The ones that worked are denoted with # yes and the ones that gave Interactions with: ... are denoted with # no:
Using matches() instead of starts_with() works.
# yes
recipe(df) %>%
step_interact(~matches('bbbbbbbbbbbbbb'):matches('cccccccccccccc') + matches('bbbbbbbbbbbbbb'):matches('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: matches("bbbbbbbbbbbbbb"):matches("cccccccccccccc") +
#> matches("bbbbbbbbbbbbbb"):matches("d")Using 14 characters in the third variable name does not work in either order.
df <- data.frame(
a = 1:10,
bbbbbbbbbbbbbb = 1:10, # 14
cccccccccccccc = 1:10, # 14
dddddddddddddd = 1:10 # 14
)
# no
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('dddddddddddddd'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...
# no
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('dddddddddddddd') + starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...Using 13 characters in two variables depends on the order of interactions.
df <- data.frame(
a = 1:10,
bbbbbbbbbbbbb = 1:10, # 13
ccccccccccccc = 1:10, # 13
d = 1:10
)
# no
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('ccccccccccccc') + starts_with('bbbbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbb'):starts_with('ccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbbbb"):starts_with("ccccccccccccc")Other combinations of the number of characters in variable name:
df <- data.frame(
a = 1:10,
bbbbbbbbbbbb = 1:10, # 12
cccccccccccc = 1:10, # 12
d = 1:10
)
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccc') + starts_with('bbbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccc") +
#> starts_with("bbbbbbbbbbbb"):starts_with("d")
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbbbb = 1:10, # 13
cccccccccccccc = 1:10, # 14
d = 1:10
)
# no
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbbbb"):starts_with("cccccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbbb = 1:10, # 12
cccccccccccccc = 1:10, # 14
d = 1:10
)
# no
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: ...
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbb = 1:10, # 11
cccccccccccccc = 1:10, # 14
d = 1:10
)
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("cccccccccccccc") +
#> starts_with("bbbbbbbbbbb"):starts_with("d")
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('cccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbb"):starts_with("cccccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbb = 1:10, # 11
ccccccccccccc = 1:10, # 13
d = 1:10
)
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('ccccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("ccccccccccccc") +
#> starts_with("bbbbbbbbbbb"):starts_with("d")
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('ccccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbb"):starts_with("ccccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbb = 1:10, # 11
cccccccccccc = 1:10, # 12
d = 1:10
)
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('cccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("cccccccccccc") +
#> starts_with("bbbbbbbbbbb"):starts_with("d")
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('cccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbb"):starts_with("cccccccccccc")
df <- data.frame(
a = 1:10,
bbbbbbbbbbb = 1:10, # 11
ccccccccccc = 1:10, # 11
d = 1:10
)
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('ccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("ccccccccccc") +
#> starts_with("bbbbbbbbbbb"):starts_with("d")
# yes
recipe(df) %>%
step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('ccccccccccc'))
#>
#> -- Recipe ----------------------------------------------------------------------
#>
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#>
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#> starts_with("bbbbbbbbbbb"):starts_with("ccccccccccc")Created on 2023-10-05 by the reprex package (v2.0.1)
Session info
sessioninfo::session_info()
#> - Session info ---------------------------------------------------------------
#> setting value
#> version R version 4.1.2 (2021-11-01)
#> os Windows 10 x64 (build 19045)
#> system x86_64, mingw32
#> ui RTerm
#> language (EN)
#> collate English_United States.1252
#> ctype English_United States.1252
#> tz America/Denver
#> date 2023-10-05
#>
#> - Packages -------------------------------------------------------------------
#> package * version date (UTC) lib source
#> class 7.3-19 2021-05-03 [1] CRAN (R 4.1.2)
#> cli 3.6.1 2023-03-23 [1] CRAN (R 4.1.3)
#> codetools 0.2-18 2020-11-04 [1] CRAN (R 4.1.2)
#> data.table 1.14.2 2021-09-27 [1] CRAN (R 4.1.2)
#> digest 0.6.29 2021-12-01 [1] CRAN (R 4.1.2)
#> dplyr * 1.1.3 2023-09-03 [1] CRAN (R 4.1.2)
#> evaluate 0.14 2019-05-28 [1] CRAN (R 4.1.2)
#> fansi 0.5.0 2021-05-25 [1] CRAN (R 4.1.2)
#> fastmap 1.1.0 2021-01-25 [1] CRAN (R 4.1.2)
#> fs 1.5.2 2021-12-08 [1] CRAN (R 4.1.2)
#> future 1.33.0 2023-07-01 [1] CRAN (R 4.1.2)
#> future.apply 1.11.0 2023-05-21 [1] CRAN (R 4.1.2)
#> generics 0.1.2 2022-01-31 [1] CRAN (R 4.1.3)
#> globals 0.16.2 2022-11-21 [1] CRAN (R 4.1.3)
#> glue 1.6.2 2022-02-24 [1] CRAN (R 4.1.3)
#> gower 1.0.1 2022-12-22 [1] CRAN (R 4.1.3)
#> hardhat 1.3.0 2023-03-30 [1] CRAN (R 4.1.3)
#> highr 0.9 2021-04-16 [1] CRAN (R 4.1.2)
#> htmltools 0.5.2 2021-08-25 [1] CRAN (R 4.1.2)
#> ipred 0.9-14 2023-03-09 [1] CRAN (R 4.1.3)
#> knitr 1.36 2021-09-29 [1] CRAN (R 4.1.2)
#> lattice 0.20-45 2021-09-22 [1] CRAN (R 4.1.2)
#> lava 1.7.2.1 2023-02-27 [1] CRAN (R 4.1.3)
#> lifecycle 1.0.3 2022-10-07 [1] CRAN (R 4.1.3)
#> listenv 0.9.0 2022-12-16 [1] CRAN (R 4.1.3)
#> lubridate 1.8.0 2021-10-07 [1] CRAN (R 4.1.2)
#> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.1.2)
#> MASS 7.3-54 2021-05-03 [1] CRAN (R 4.1.2)
#> Matrix 1.3-4 2021-06-01 [1] CRAN (R 4.1.2)
#> nnet 7.3-16 2021-05-03 [1] CRAN (R 4.1.2)
#> parallelly 1.36.0 2023-05-26 [1] CRAN (R 4.1.2)
#> pillar 1.9.0 2023-03-22 [1] CRAN (R 4.1.3)
#> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.1.2)
#> prodlim 2023.03.31 2023-04-02 [1] CRAN (R 4.1.3)
#> purrr 1.0.2 2023-08-10 [1] CRAN (R 4.1.2)
#> R.cache 0.15.0 2021-04-30 [1] CRAN (R 4.1.3)
#> R.methodsS3 1.8.1 2020-08-26 [1] CRAN (R 4.1.1)
#> R.oo 1.24.0 2020-08-26 [1] CRAN (R 4.1.1)
#> R.utils 2.11.0 2021-09-26 [1] CRAN (R 4.1.3)
#> R6 2.5.1 2021-08-19 [1] CRAN (R 4.1.2)
#> Rcpp 1.0.7 2021-07-07 [1] CRAN (R 4.1.2)
#> recipes * 1.0.8 2023-08-25 [1] CRAN (R 4.1.2)
#> reprex 2.0.1 2021-08-05 [1] CRAN (R 4.1.2)
#> rlang 1.1.1 2023-04-28 [1] CRAN (R 4.1.2)
#> rmarkdown 2.11 2021-09-14 [1] CRAN (R 4.1.2)
#> rpart 4.1-15 2019-04-12 [1] CRAN (R 4.1.2)
#> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.1.2)
#> sessioninfo 1.2.2 2021-12-06 [1] CRAN (R 4.1.2)
#> stringi 1.7.6 2021-11-29 [1] CRAN (R 4.1.2)
#> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.1.2)
#> styler 1.7.0 2022-03-13 [1] CRAN (R 4.1.3)
#> survival 3.2-13 2021-08-24 [1] CRAN (R 4.1.2)
#> tibble 3.2.1 2023-03-20 [1] CRAN (R 4.1.3)
#> tidyselect 1.2.0 2022-10-10 [1] CRAN (R 4.1.3)
#> timeDate 4022.108 2023-01-07 [1] CRAN (R 4.1.3)
#> utf8 1.2.2 2021-07-24 [1] CRAN (R 4.1.2)
#> vctrs 0.6.3 2023-06-14 [1] CRAN (R 4.1.2)
#> withr 2.4.3 2021-11-30 [1] CRAN (R 4.1.2)
#> xfun 0.29 2021-12-14 [1] CRAN (R 4.1.2)
#> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.1.1)
#>
#>
#> ------------------------------------------------------------------------------