Skip to content

Error in starts_with and step_interact related to length of variable name and order #1231

@ray-p144

Description

@ray-p144

The problem

There seems to be an issue with the step_interact() function when attempting to create multiple interactions and two of the variable names have more than a certain number of characters (12 to 14?) is specified as the first interaction, but not when specifying it as the second interaction.

Reproducible example

I originally ran into the problem when using two variables with the 14 characters in the name where Interactions with: gives ... which leads to the error in rlang::f_rhs() when calling prep().

library(recipes)
#> Loading required package: dplyr
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
#> 
#> Attaching package: 'recipes'
#> The following object is masked from 'package:stats':
#> 
#>     step

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbbbb = 1:10, # 14
    cccccccccccccc = 1:10, # 14
    d = 1:10
)

# error
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('d')) %>%
    prep(training = df)
#> Warning: Interaction specification failed for: ~.... No interactions will be
#> created.
#> Error in `step_interact()`:
#> Caused by error in `rlang::f_rhs()`:
#> ! `x` must be a formula

Recipe output without prep:

recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

Switching the order of specifying the interaction allows it to work.

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbbbbb"):starts_with("cccccccccccccc")

I then proceeded to try to find a pattern for the error, but have been unable to discern anything from my attempt.

The following are additional things I tried for finding a pattern. The ones that worked are denoted with # yes and the ones that gave Interactions with: ... are denoted with # no:

Using matches() instead of starts_with() works.

# yes
recipe(df) %>%
    step_interact(~matches('bbbbbbbbbbbbbb'):matches('cccccccccccccc') + matches('bbbbbbbbbbbbbb'):matches('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: matches("bbbbbbbbbbbbbb"):matches("cccccccccccccc") +
#>   matches("bbbbbbbbbbbbbb"):matches("d")

Using 14 characters in the third variable name does not work in either order.

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbbbb = 1:10, # 14
    cccccccccccccc = 1:10, # 14
    dddddddddddddd = 1:10  # 14
)
# no
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbbb'):starts_with('dddddddddddddd'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

# no
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbbb'):starts_with('dddddddddddddd') + starts_with('bbbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

Using 13 characters in two variables depends on the order of interactions.

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbbb = 1:10, # 13
    ccccccccccccc = 1:10, # 13
    d = 1:10
)
# no
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('ccccccccccccc') + starts_with('bbbbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbb'):starts_with('ccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbbbb"):starts_with("ccccccccccccc")

Other combinations of the number of characters in variable name:

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbb = 1:10, # 12
    cccccccccccc = 1:10, # 12
    d = 1:10
)
# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccc') + starts_with('bbbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccc") +
#>   starts_with("bbbbbbbbbbbb"):starts_with("d")

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccc")

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbbb = 1:10, # 13
    cccccccccccccc = 1:10, # 14
    d = 1:10
)
# no
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbbbb"):starts_with("cccccccccccccc")

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbbb = 1:10, # 12
    cccccccccccccc = 1:10, # 14
    d = 1:10
)
# no
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: ...

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbbb'):starts_with('cccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbbb"):starts_with("cccccccccccccc")

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbb = 1:10, # 11
    cccccccccccccc = 1:10, # 14
    d = 1:10
)
# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('cccccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("cccccccccccccc") +
#>   starts_with("bbbbbbbbbbb"):starts_with("d")

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('cccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbb"):starts_with("cccccccccccccc")


df <- data.frame(
    a = 1:10,
    bbbbbbbbbbb = 1:10, # 11
    ccccccccccccc = 1:10, # 13
    d = 1:10
)
# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('ccccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("ccccccccccccc") +
#>   starts_with("bbbbbbbbbbb"):starts_with("d")

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('ccccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbb"):starts_with("ccccccccccccc")

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbb = 1:10, # 11
    cccccccccccc = 1:10, # 12
    d = 1:10
)
# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('cccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("cccccccccccc") +
#>   starts_with("bbbbbbbbbbb"):starts_with("d")

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('cccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbb"):starts_with("cccccccccccc")

df <- data.frame(
    a = 1:10,
    bbbbbbbbbbb = 1:10, # 11
    ccccccccccc = 1:10, # 11
    d = 1:10
)
# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('ccccccccccc') + starts_with('bbbbbbbbbbb'):starts_with('d'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("ccccccccccc") +
#>   starts_with("bbbbbbbbbbb"):starts_with("d")

# yes
recipe(df) %>%
    step_interact(~starts_with('bbbbbbbbbbb'):starts_with('d') + starts_with('bbbbbbbbbbb'):starts_with('ccccccccccc'))
#> 
#> -- Recipe ----------------------------------------------------------------------
#> 
#> -- Inputs
#> Number of variables by role
#> undeclared role: 4
#> 
#> -- Operations
#> * Interactions with: starts_with("bbbbbbbbbbb"):starts_with("d") +
#>   starts_with("bbbbbbbbbbb"):starts_with("ccccccccccc")

Created on 2023-10-05 by the reprex package (v2.0.1)

Session info
sessioninfo::session_info()
#> - Session info ---------------------------------------------------------------
#>  setting  value
#>  version  R version 4.1.2 (2021-11-01)
#>  os       Windows 10 x64 (build 19045)
#>  system   x86_64, mingw32
#>  ui       RTerm
#>  language (EN)
#>  collate  English_United States.1252
#>  ctype    English_United States.1252
#>  tz       America/Denver
#>  date     2023-10-05
#> 
#> - Packages -------------------------------------------------------------------
#>  package      * version    date (UTC) lib source
#>  class          7.3-19     2021-05-03 [1] CRAN (R 4.1.2)
#>  cli            3.6.1      2023-03-23 [1] CRAN (R 4.1.3)
#>  codetools      0.2-18     2020-11-04 [1] CRAN (R 4.1.2)
#>  data.table     1.14.2     2021-09-27 [1] CRAN (R 4.1.2)
#>  digest         0.6.29     2021-12-01 [1] CRAN (R 4.1.2)
#>  dplyr        * 1.1.3      2023-09-03 [1] CRAN (R 4.1.2)
#>  evaluate       0.14       2019-05-28 [1] CRAN (R 4.1.2)
#>  fansi          0.5.0      2021-05-25 [1] CRAN (R 4.1.2)
#>  fastmap        1.1.0      2021-01-25 [1] CRAN (R 4.1.2)
#>  fs             1.5.2      2021-12-08 [1] CRAN (R 4.1.2)
#>  future         1.33.0     2023-07-01 [1] CRAN (R 4.1.2)
#>  future.apply   1.11.0     2023-05-21 [1] CRAN (R 4.1.2)
#>  generics       0.1.2      2022-01-31 [1] CRAN (R 4.1.3)
#>  globals        0.16.2     2022-11-21 [1] CRAN (R 4.1.3)
#>  glue           1.6.2      2022-02-24 [1] CRAN (R 4.1.3)
#>  gower          1.0.1      2022-12-22 [1] CRAN (R 4.1.3)
#>  hardhat        1.3.0      2023-03-30 [1] CRAN (R 4.1.3)
#>  highr          0.9        2021-04-16 [1] CRAN (R 4.1.2)
#>  htmltools      0.5.2      2021-08-25 [1] CRAN (R 4.1.2)
#>  ipred          0.9-14     2023-03-09 [1] CRAN (R 4.1.3)
#>  knitr          1.36       2021-09-29 [1] CRAN (R 4.1.2)
#>  lattice        0.20-45    2021-09-22 [1] CRAN (R 4.1.2)
#>  lava           1.7.2.1    2023-02-27 [1] CRAN (R 4.1.3)
#>  lifecycle      1.0.3      2022-10-07 [1] CRAN (R 4.1.3)
#>  listenv        0.9.0      2022-12-16 [1] CRAN (R 4.1.3)
#>  lubridate      1.8.0      2021-10-07 [1] CRAN (R 4.1.2)
#>  magrittr       2.0.1      2020-11-17 [1] CRAN (R 4.1.2)
#>  MASS           7.3-54     2021-05-03 [1] CRAN (R 4.1.2)
#>  Matrix         1.3-4      2021-06-01 [1] CRAN (R 4.1.2)
#>  nnet           7.3-16     2021-05-03 [1] CRAN (R 4.1.2)
#>  parallelly     1.36.0     2023-05-26 [1] CRAN (R 4.1.2)
#>  pillar         1.9.0      2023-03-22 [1] CRAN (R 4.1.3)
#>  pkgconfig      2.0.3      2019-09-22 [1] CRAN (R 4.1.2)
#>  prodlim        2023.03.31 2023-04-02 [1] CRAN (R 4.1.3)
#>  purrr          1.0.2      2023-08-10 [1] CRAN (R 4.1.2)
#>  R.cache        0.15.0     2021-04-30 [1] CRAN (R 4.1.3)
#>  R.methodsS3    1.8.1      2020-08-26 [1] CRAN (R 4.1.1)
#>  R.oo           1.24.0     2020-08-26 [1] CRAN (R 4.1.1)
#>  R.utils        2.11.0     2021-09-26 [1] CRAN (R 4.1.3)
#>  R6             2.5.1      2021-08-19 [1] CRAN (R 4.1.2)
#>  Rcpp           1.0.7      2021-07-07 [1] CRAN (R 4.1.2)
#>  recipes      * 1.0.8      2023-08-25 [1] CRAN (R 4.1.2)
#>  reprex         2.0.1      2021-08-05 [1] CRAN (R 4.1.2)
#>  rlang          1.1.1      2023-04-28 [1] CRAN (R 4.1.2)
#>  rmarkdown      2.11       2021-09-14 [1] CRAN (R 4.1.2)
#>  rpart          4.1-15     2019-04-12 [1] CRAN (R 4.1.2)
#>  rstudioapi     0.13       2020-11-12 [1] CRAN (R 4.1.2)
#>  sessioninfo    1.2.2      2021-12-06 [1] CRAN (R 4.1.2)
#>  stringi        1.7.6      2021-11-29 [1] CRAN (R 4.1.2)
#>  stringr        1.4.0      2019-02-10 [1] CRAN (R 4.1.2)
#>  styler         1.7.0      2022-03-13 [1] CRAN (R 4.1.3)
#>  survival       3.2-13     2021-08-24 [1] CRAN (R 4.1.2)
#>  tibble         3.2.1      2023-03-20 [1] CRAN (R 4.1.3)
#>  tidyselect     1.2.0      2022-10-10 [1] CRAN (R 4.1.3)
#>  timeDate       4022.108   2023-01-07 [1] CRAN (R 4.1.3)
#>  utf8           1.2.2      2021-07-24 [1] CRAN (R 4.1.2)
#>  vctrs          0.6.3      2023-06-14 [1] CRAN (R 4.1.2)
#>  withr          2.4.3      2021-11-30 [1] CRAN (R 4.1.2)
#>  xfun           0.29       2021-12-14 [1] CRAN (R 4.1.2)
#>  yaml           2.2.1      2020-02-01 [1] CRAN (R 4.1.1)
#> 
#> 
#> ------------------------------------------------------------------------------

Metadata

Metadata

Assignees

Labels

bugan unexpected problem or unintended behavior

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions