error in cubist translation #57

topepo · 2019-07-16T00:43:58Z

Model object generated using Cubist_0.2.2

library(tidypredict)
library(sessioninfo)
options(width = 100)

mod <-
  structure(
    list(
      data = "15\\.5,8,318.0,150,2.76,3.520,16.87,0,0,3,2\n15\\.5,8,318.0,150,2.76,3.520,16.87,0,0,3,2\n27\\.3,4,79.0,66,4.08,1.935,18.90,1,1,4,1\n21\\.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1\n21,6,160.0,110,3.90,2.875,17.02,0,1,4,4\n21,6,160.0,110,3.90,2.620,16.46,0,1,4,4\n15\\.8,8,351.0,264,4.22,3.170,14.50,0,1,5,4\n14\\.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4\n10\\.4,8,460.0,215,3.00,5.424,17.82,0,0,3,4\n15\\.2,8,275.8,180,3.07,3.780,18.00,0,0,3,3\n14\\.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4\n18\\.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1\n22\\.8,4,140.8,95,3.92,3.150,22.90,1,0,4,2\n21,6,160.0,110,3.90,2.620,16.46,0,1,4,4\n15,8,301.0,335,3.54,3.570,14.60,0,1,5,8\n21\\.4,4,121.0,109,4.11,2.780,18.60,1,1,4,2\n18\\.1,6,225.0,105,2.76,3.460,20.22,1,0,3,1\n10\\.4,8,460.0,215,3.00,5.424,17.82,0,0,3,4\n13\\.3,8,350.0,245,3.73,3.840,15.41,0,0,3,4\n14\\.7,8,440.0,230,3.23,5.345,17.42,0,0,3,4\n15\\.8,8,351.0,264,4.22,3.170,14.50,0,1,5,4\n26,4,120.3,91,4.43,2.140,16.70,0,1,5,2\n16\\.4,8,275.8,180,3.07,4.070,17.40,0,0,3,3\n22\\.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1\n21\\.4,4,121.0,109,4.11,2.780,18.60,1,1,4,2\n19\\.7,6,145.0,175,3.62,2.770,15.50,0,1,5,6\n15\\.8,8,351.0,264,4.22,3.170,14.50,0,1,5,4\n15\\.2,8,304.0,150,3.15,3.435,17.30,0,0,3,2\n26,4,120.3,91,4.43,2.140,16.70,0,1,5,2\n17\\.3,8,275.8,180,3.07,3.730,17.60,0,0,3,3\n21\\.4,4,121.0,109,4.11,2.780,18.60,1,1,4,2\n22\\.8,4,108.0,93,3.85,2.320,18.61,1,1,4,1",
      names = "| Generated using R version 3.6.0 (2019-04-26)\n| on Mon Jul 15 20:36:50 2019\noutcome.\n\noutcome: continuous.\ncyl: continuous.\ndisp: continuous.\nhp: continuous.\ndrat: continuous.\nwt: continuous.\nqsec: continuous.\nvs: continuous.\nam: continuous.\ngear: continuous.\ncarb: continuous.\n",
      caseWeights = FALSE,
      model = "id=\"Cubist 2.07 GPL Edition 2019-07-15\"\nprec=\"1\" globalmean=\"18.37188\" extrap=\"1\" insts=\"0\" ceiling=\"44.2\" floor=\"0\"\natt=\"outcome\" mean=\"18.37\" sd=\"4.326829\" min=\"10.4\" max=\"27.3\"\natt=\"cyl\" mean=\"6.4\" sd=\"1.740181\" min=\"4\" max=\"8\"\natt=\"disp\" mean=\"252.61\" sd=\"121.1964\" min=\"79\" max=\"460\"\natt=\"hp\" mean=\"161.3\" sd=\"69.12569\" min=\"66\" max=\"335\"\natt=\"drat\" mean=\"3.572\" sd=\"0.5478398\" min=\"2.76\" max=\"4.43\"\natt=\"wt\" mean=\"3.4132\" sd=\"1.007245\" min=\"1.935\" max=\"5.424\"\natt=\"qsec\" mean=\"17.468\" sd=\"1.836215\" min=\"14.5\" max=\"22.9\"\natt=\"vs\" mean=\"0.3\" sd=\"0.4709301\" min=\"0\" max=\"1\"\natt=\"am\" mean=\"0.5\" sd=\"0.5080015\" min=\"0\" max=\"1\"\natt=\"gear\" mean=\"3.7\" sd=\"0.8032203\" min=\"3\" max=\"5\"\natt=\"carb\" mean=\"2.9\" sd=\"1.596052\" min=\"1\" max=\"8\"\nentries=\"1\"\nrules=\"1\"\nconds=\"0\" cover=\"32\" mean=\"18.37\" loval=\"10.4\" hival=\"27.3\" esterr=\"1.46\"\ncoeff=\"20.52\" att=\"disp\" coeff=\"-0.0294\" att=\"drat\" coeff=\"1.33\"\n",
      output = "\nCubist [Release 2.07 GPL Edition]  Mon Jul 15 20:36:50 2019\n---------------------------------\n\n    Target attribute `outcome'\n\nRead 32 cases (11 attributes) from undefined.data\n\nModel:\n\n  Rule 1: [32 cases, mean 18.37, range 10.4 to 27.3, est err 1.46]\n\n\toutcome = 20.52 - 0.0294 disp + 1.33 drat\n\n\nEvaluation on training data (32 cases):\n\n    Average  |error|               0.71\n    Relative |error|               0.19\n    Correlation coefficient        0.94\n\n\n\tAttribute usage:\n\t  Conds  Model\n\n\t         100%    disp\n\t         100%    drat\n\n\nTime: 0.0 secs\n",
      control = list(
        unbiased = FALSE,
        rules = 100,
        extrapolation = 1,
        sample = 0,
        label = "outcome",
        seed = 27L
      ),
      committees = 1,
      maxd = 2.3,
      dims = c(32L, 10L),
      splits = NULL,
      usage = structure(
        list(
          Conditions = c(0, 0, 0, 0, 0, 0, 0, 0, 0, 0),
          Model = c(100,
                    100, 0, 0, 0, 0, 0, 0, 0, 0),
          Variable = c(
            "disp",
            "drat",
            "cyl",
            "hp",
            "wt",
            "qsec",
            "vs",
            "am",
            "gear",
            "carb"
          )
        ),
        row.names = c(NA,-10L),
        class = "data.frame"
      ),
      call = as.call(quote(
        cubist.default(
          x = dat[, names(dat) != ".outcome", drop = FALSE],
          y = dat$.outcome,
          committees = 1,
          control = ctrl
        )
      )),
      coefficients = structure(
        list(
          `(Intercept)` = 20.52,
          cyl = NA_real_,
          disp = -0.0294,
          hp = NA_real_,
          drat = 1.33,
          wt = NA_real_,
          qsec = NA_real_,
          vs = NA_real_,
          am = NA_real_,
          gear = NA_real_,
          carb = NA_real_,
          committee = "1",
          rule = "1"
        ),
        row.names = "(Intercept)",
        reshapeWide = list(
          v.names = "value",
          timevar = "var",
          idvar = "tmp",
          times = structure(
            c(1L,
              4L, 5L, 8L, 6L, 11L, 9L, 10L, 2L, 7L, 3L),
            .Label = c(
              "(Intercept)",
              "am",
              "carb",
              "cyl",
              "disp",
              "drat",
              "gear",
              "hp",
              "qsec",
              "vs",
              "wt"
            ),
            class = "factor"
          ),
          varying = structure(
            c(
              "value.(Intercept)",
              "value.cyl",
              "value.disp",
              "value.hp",
              "value.drat",
              "value.wt",
              "value.qsec",
              "value.vs",
              "value.am",
              "value.gear",
              "value.carb"
            ),
            .Dim = c(1L, 11L)
          )
        ),
        class = "data.frame"
      ),
      vars = list(
        all = c(
          "cyl",
          "disp",
          "hp",
          "drat",
          "wt",
          "qsec",
          "vs",
          "am",
          "gear",
          "carb"
        ),
        used = c("drat", "committee",
                 "rule")
      )
    ),
    class = "cubist"
  )

tidypredict::tidypredict_fit(mod)
#> Error in splits[splits$rule == .x & splits$committee == comm, ]: incorrect number of dimensions

session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 3.6.0 (2019-04-26)
#>  os       macOS High Sierra 10.13.6   
#>  system   x86_64, darwin15.6.0        
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       America/New_York            
#>  date     2019-07-15                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────────────────────────
#>  package     * version    date       lib source                            
#>  assertthat    0.2.1      2019-03-21 [1] CRAN (R 3.6.0)                    
#>  cli           1.1.0      2019-03-19 [1] CRAN (R 3.6.0)                    
#>  crayon        1.3.4      2017-09-16 [1] CRAN (R 3.6.0)                    
#>  digest        0.6.20     2019-07-04 [1] CRAN (R 3.6.0)                    
#>  dplyr         0.8.3      2019-07-04 [1] CRAN (R 3.6.0)                    
#>  evaluate      0.14       2019-05-28 [1] CRAN (R 3.6.0)                    
#>  generics      0.0.2      2018-11-29 [1] CRAN (R 3.6.0)                    
#>  glue          1.3.1      2019-03-12 [1] CRAN (R 3.6.0)                    
#>  highr         0.8        2019-03-20 [1] CRAN (R 3.6.0)                    
#>  htmltools     0.3.6      2017-04-28 [1] CRAN (R 3.6.0)                    
#>  knitr         1.23       2019-05-18 [1] CRAN (R 3.6.0)                    
#>  magrittr      1.5        2014-11-22 [1] CRAN (R 3.6.0)                    
#>  pillar        1.4.2      2019-06-29 [1] CRAN (R 3.6.0)                    
#>  pkgconfig     2.0.2      2018-08-16 [1] CRAN (R 3.6.0)                    
#>  purrr         0.3.2      2019-03-15 [1] CRAN (R 3.6.0)                    
#>  R6            2.4.0      2019-02-14 [1] CRAN (R 3.6.0)                    
#>  Rcpp          1.0.1      2019-03-17 [1] CRAN (R 3.6.0)                    
#>  rlang         0.4.0.9000 2019-07-09 [1] Github (r-lib/rlang@a3d7e47)      
#>  rmarkdown     1.14       2019-07-12 [1] CRAN (R 3.6.0)                    
#>  sessioninfo * 1.1.1.9000 2019-03-26 [1] Github (r-lib/sessioninfo@dfb3ea8)
#>  stringi       1.4.3      2019-03-12 [1] CRAN (R 3.6.0)                    
#>  stringr       1.4.0      2019-02-10 [1] CRAN (R 3.6.0)                    
#>  tibble        2.1.3      2019-06-06 [1] CRAN (R 3.6.0)                    
#>  tidypredict * 0.4.2      2019-07-15 [1] local                             
#>  tidyselect    0.2.5      2018-10-11 [1] CRAN (R 3.6.0)                    
#>  withr         2.1.2      2018-03-15 [1] CRAN (R 3.6.0)                    
#>  xfun          0.8        2019-06-25 [1] CRAN (R 3.6.0)                    
#>  yaml          2.2.0      2018-07-25 [1] CRAN (R 3.6.0)                    
#> 
#> [1] /Library/Frameworks/R.framework/Versions/3.6/Resources/library

^{Created on 2019-07-15 by the reprex package (v0.2.1)}

The text was updated successfully, but these errors were encountered:

topepo · 2019-07-18T11:17:41Z

The underlying issue is that the model is:

Model:

  Rule 1: [32 cases, mean 18.37, range 10.4 to 27.3, est err 1.46]

	outcome = 20.52 - 0.0294 disp + 1.33 drat

so there is no information on the splitting vars (model$splits is NULL).

github-actions · 2021-10-13T00:35:49Z

This issue has been automatically locked. If you believe you have found a related problem, please file a new issue (with a reprex: https://reprex.tidyverse.org) and link to this issue.

topepo mentioned this issue Jan 7, 2020

tidy_rules (attempt 2) topepo/Cubist#22

Closed

topepo added the bug an unexpected problem or unintended behavior label Apr 3, 2020

topepo mentioned this issue Apr 3, 2020

[bug report] cubist difference between predict and tidypredict_to_column #67

Open

edgararuiz mentioned this issue Sep 27, 2021

Adds ability to handle single committee and single rule Cubist models #100

Merged

edgararuiz closed this as completed in #100 Sep 28, 2021

github-actions bot locked and limited conversation to collaborators Oct 13, 2021

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

error in cubist translation #57

error in cubist translation #57

topepo commented Jul 16, 2019

topepo commented Jul 18, 2019

github-actions bot commented Oct 13, 2021

error in cubist translation #57

error in cubist translation #57

Comments

topepo commented Jul 16, 2019

topepo commented Jul 18, 2019

github-actions bot commented Oct 13, 2021