Skip to content

read_delim_chunked functions do not return column specifications #1143

@svraka

Description

@svraka

The read_delim functions return the column specification used by readr as an attribute which can be accessed using spec(). However, this is not the case with the read_delim_chunked functions. The specifications are printed but cannot be accessed.

library(readr)
read_csv_chunked(readr_example("mtcars.csv"),
                 callback = ListCallback$new(function(x, pos) spec(x)),
                 chunk_size = 20)
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   mpg = col_double(),
#>   cyl = col_double(),
#>   disp = col_double(),
#>   hp = col_double(),
#>   drat = col_double(),
#>   wt = col_double(),
#>   qsec = col_double(),
#>   vs = col_double(),
#>   am = col_double(),
#>   gear = col_double(),
#>   carb = col_double()
#> )
#> [[1]]
#> NULL
#> 
#> [[2]]
#> NULL

Long specifications are truncated by default, thus workarounds, like options(readr.num_columns = Inf) are needed to obtain the full spec. Meanwhile other attributes, like problems can be accessed:

read_csv_chunked(readr_example("mtcars.csv"),
                 callback = ListCallback$new(function(x, pos) problems(x)),
                 chunk_size = 20,
                 col_types = paste0(rep("i", 11), collapse = ""))
#> [[1]]
#> # A tibble: 85 x 4
#>      row   col expected               actual
#>    <int> <int> <chr>                  <chr> 
#>  1     1     5 no trailing characters 3.9   
#>  2     1     6 no trailing characters 2.62  
#>  3     1     7 no trailing characters 16.46 
#>  4     2     5 no trailing characters 3.9   
#>  5     2     6 no trailing characters 2.875 
#>  6     2     7 no trailing characters 17.02 
#>  7     3     1 no trailing characters 22.8  
#>  8     3     5 no trailing characters 3.85  
#>  9     3     6 no trailing characters 2.32  
#> 10     3     7 no trailing characters 18.61 
#> # … with 75 more rows
#> 
#> [[2]]
#> # A tibble: 49 x 4
#>      row   col expected               actual
#>    <int> <int> <chr>                  <chr> 
#>  1    21     1 no trailing characters 21.5  
#>  2    21     3 no trailing characters 120.1 
#>  3    21     5 no trailing characters 3.7   
#>  4    21     6 no trailing characters 2.465 
#>  5    21     7 no trailing characters 20.01 
#>  6    22     1 no trailing characters 15.5  
#>  7    22     5 no trailing characters 2.76  
#>  8    22     6 no trailing characters 3.52  
#>  9    22     7 no trailing characters 16.87 
#> 10    23     1 no trailing characters 15.2  
#> # … with 39 more rows

Chunks have a spec_tbl_df class, which suggests to me that maybe the col spec should also be accessible?

read_csv_chunked(readr_example("mtcars.csv"),
                 callback = ListCallback$new(function(x, pos) class(x)),
                 chunk_size = 20)
#> 
#> ── Column specification ────────────────────────────────────────────────────────
#> cols(
#>   mpg = col_double(),
#>   cyl = col_double(),
#>   disp = col_double(),
#>   hp = col_double(),
#>   drat = col_double(),
#>   wt = col_double(),
#>   qsec = col_double(),
#>   vs = col_double(),
#>   am = col_double(),
#>   gear = col_double(),
#>   carb = col_double()
#> )
#> [[1]]
#> [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
#> 
#> [[2]]
#> [1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame"

Created on 2020-11-02 by the reprex package (v0.3.0)

Session info
devtools::session_info()
#> ─ Session info ───────────────────────────────────────────────────────────────
#>  setting  value                       
#>  version  R version 4.0.3 (2020-10-10)
#>  os       Ubuntu 20.04.1 LTS          
#>  system   x86_64, linux-gnu           
#>  ui       X11                         
#>  language (EN)                        
#>  collate  en_US.UTF-8                 
#>  ctype    en_US.UTF-8                 
#>  tz       Etc/UTC                     
#>  date     2020-11-02                  
#> 
#> ─ Packages ───────────────────────────────────────────────────────────────────
#>  package     * version date       lib source        
#>  assertthat    0.2.1   2019-03-21 [1] RSPM (R 4.0.0)
#>  backports     1.1.10  2020-09-15 [1] RSPM (R 4.0.2)
#>  callr         3.5.1   2020-10-13 [1] RSPM (R 4.0.2)
#>  cli           2.1.0   2020-10-12 [1] RSPM (R 4.0.2)
#>  crayon        1.3.4   2017-09-16 [1] RSPM (R 4.0.0)
#>  desc          1.2.0   2018-05-01 [1] RSPM (R 4.0.0)
#>  devtools      2.3.2   2020-09-18 [1] RSPM (R 4.0.2)
#>  digest        0.6.27  2020-10-24 [1] RSPM (R 4.0.3)
#>  ellipsis      0.3.1   2020-05-15 [1] RSPM (R 4.0.0)
#>  evaluate      0.14    2019-05-28 [1] RSPM (R 4.0.0)
#>  fansi         0.4.1   2020-01-08 [1] RSPM (R 4.0.0)
#>  fs            1.5.0   2020-07-31 [1] RSPM (R 4.0.2)
#>  glue          1.4.2   2020-08-27 [1] RSPM (R 4.0.2)
#>  highr         0.8     2019-03-20 [1] RSPM (R 4.0.0)
#>  hms           0.5.3   2020-01-08 [1] RSPM (R 4.0.0)
#>  htmltools     0.5.0   2020-06-16 [1] RSPM (R 4.0.1)
#>  knitr         1.30    2020-09-22 [1] CRAN (R 4.0.2)
#>  lifecycle     0.2.0   2020-03-06 [1] RSPM (R 4.0.0)
#>  magrittr      1.5     2014-11-22 [1] RSPM (R 4.0.0)
#>  memoise       1.1.0   2017-04-21 [1] RSPM (R 4.0.0)
#>  pillar        1.4.6   2020-07-10 [1] RSPM (R 4.0.2)
#>  pkgbuild      1.1.0   2020-07-13 [1] RSPM (R 4.0.2)
#>  pkgconfig     2.0.3   2019-09-22 [1] RSPM (R 4.0.0)
#>  pkgload       1.1.0   2020-05-29 [1] RSPM (R 4.0.0)
#>  prettyunits   1.1.1   2020-01-24 [1] RSPM (R 4.0.0)
#>  processx      3.4.4   2020-09-03 [1] RSPM (R 4.0.2)
#>  ps            1.4.0   2020-10-07 [1] RSPM (R 4.0.2)
#>  R6            2.5.0   2020-10-28 [1] RSPM (R 4.0.3)
#>  readr       * 1.4.0   2020-10-05 [1] RSPM (R 4.0.2)
#>  remotes       2.2.0   2020-07-21 [1] RSPM (R 4.0.2)
#>  rlang         0.4.8   2020-10-08 [1] RSPM (R 4.0.2)
#>  rmarkdown     2.5     2020-10-21 [1] RSPM (R 4.0.3)
#>  rprojroot     1.3-2   2018-01-03 [1] RSPM (R 4.0.0)
#>  rstudioapi    0.11    2020-02-07 [1] RSPM (R 4.0.0)
#>  sessioninfo   1.1.1   2018-11-05 [1] RSPM (R 4.0.0)
#>  stringi       1.5.3   2020-09-09 [1] RSPM (R 4.0.2)
#>  stringr       1.4.0   2019-02-10 [1] RSPM (R 4.0.0)
#>  testthat      3.0.0   2020-10-31 [1] CRAN (R 4.0.3)
#>  tibble        3.0.4   2020-10-12 [1] RSPM (R 4.0.2)
#>  usethis       1.6.3   2020-09-17 [1] RSPM (R 4.0.2)
#>  utf8          1.1.4   2018-05-24 [1] RSPM (R 4.0.0)
#>  vctrs         0.3.4   2020-08-29 [1] RSPM (R 4.0.2)
#>  withr         2.3.0   2020-09-22 [1] CRAN (R 4.0.2)
#>  xfun          0.19    2020-10-30 [1] CRAN (R 4.0.3)
#>  yaml          2.2.1   2020-02-01 [1] RSPM (R 4.0.0)
#> 
#> [1] /usr/local/lib/R/site-library
#> [2] /usr/lib/R/site-library
#> [3] /usr/lib/R/library

Metadata

Metadata

Assignees

No one assigned

    Labels

    featurea feature request or enhancement

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions