Use readr (#10)

Use readr. This adds a heavy dependency but substantially increases safety.
maurolepore · Jan 8, 2019 · 4f118e9 · 4f118e9
1 parent 1f65d46
commit 4f118e9
Show file tree

Hide file tree

Showing 23 changed files with 189 additions and 392 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -17,7 +17,8 @@ BugReports: https://github.com/maurolepore/tor/issues
 Imports: 
     fs,
     rlang,
-    tibble
+    tibble,
+    readr
 Suggests: 
     covr,
     spelling,

diff --git a/NEWS.md b/NEWS.md
@@ -2,6 +2,9 @@
 
 ## Minor
 
+* `list_any()`, `list_csv()`, `list_tsv()`, `load_csv()`, and `load_tsv()` now use __readr__.
+  * This makes reading data faster and safer.
+
 * Each dataframe output is now converted to tibble. 
     * Users no longer need to call `tibble::as_tibble()`.
 

diff --git a/R/format_path.R b/R/format_path.R
@@ -10,11 +10,11 @@
 #'
 #' @examples
 #' format_path(c("file1", "file2"), "csv")
-#'
+#' 
 #' (dfs <- list_csv(tor_example("csv")))
-#'
+#' 
 #' format_path(names(dfs), "csv")
-#'
+#' 
 #' format_path(names(dfs), "csv", base = "home", prefix = "this-")
 #' @family helpers
 #' @export

diff --git a/R/list_any.R b/R/list_any.R
@@ -8,23 +8,23 @@
 #'
 #' @examples
 #' tor_example()
-#'
+#' 
 #' path <- tor_example("csv")
 #' dir(path)
-#'
+#' 
 #' list_any(path, read.csv)
-#'
+#' 
 #' list_any(path, ~ read.csv(.x, stringsAsFactors = FALSE))
-#'
+#' 
 #' (path_mixed <- tor_example("mixed"))
 #' dir(path_mixed)
-#'
+#' 
 #' list_any(
 #'   path_mixed, ~ get(load(.x)),
 #'   regexp = "[.]csv$",
 #'   invert = TRUE
 #' )
-#'
+#' 
 #' list_any(
 #'   path_mixed, ~ get(load(.x)),
 #'   "[.]Rdata$",
@@ -46,9 +46,7 @@ list_any <- function(path = ".",
   )
 
   if (length(files) == 0) {
-    abort(
-      sprintf("Can't find files matching '%s' in:\n '%s'", regexp, path)
-    )
+    abort(sprintf("Can't find files matching '%s' in:\n '%s'", regexp, path))
   }
 
   file_names <- fs::path_ext_remove(fs::path_file(files))

diff --git a/R/list_csv.R b/R/list_csv.R
@@ -3,58 +3,40 @@
 #' These functions wrap the most common special cases of [list_any()].
 #'
 #' @inheritParams list_any
-#' @inheritParams utils::read.table
-#' @param ... Arguments passed to [utils::read.table()].
+#' @inheritParams readr::read_delim
+#' @param ... Arguments passed to `readr::read_csv()` or `readr::read_tsv()`.
 #'
 #' @return A list.
 #'
 #' @examples
 #' (rds <- tor_example("rds"))
 #' dir(rds)
-#'
+#' 
 #' list_rds(rds)
-#'
+#' 
 #' (tsv <- tor_example("tsv"))
 #' dir(tsv)
-#'
+#' 
 #' list_tsv(tsv)
-#'
+#' 
 #' (mixed <- tor_example("mixed"))
 #' dir(mixed)
-#'
+#' 
 #' list_rdata(mixed)
-#'
+#' 
 #' list_csv(mixed)
-#'
+#' 
 #' list_rdata(mixed, regexp = "[.]RData", ignore.case = FALSE)
 #' @family general functions to import data
 #' @export
 list_csv <- function(path = ".",
                      regexp = "[.]csv$",
                      ignore.case = TRUE,
                      invert = FALSE,
-                     header = TRUE,
-                     sep = ",",
-                     quote = "\"",
-                     dec = ".",
-                     fill = TRUE,
-                     comment.char = "",
-                     stringsAsFactors = FALSE,
-                     na.strings = c("", "NA"),
                      ...) {
   list_any(
     path,
-    function(x) utils::read.csv(
-        file = x,
-        header = header,
-        sep = sep,
-        quote = quote,
-        dec = dec,
-        fill = fill,
-        comment.char = comment.char,
-        stringsAsFactors = stringsAsFactors,
-        na.strings = na.strings
-      ),
+    readr::read_csv,
     regexp = regexp,
     ignore.case = ignore.case,
     invert = invert,
@@ -68,28 +50,10 @@ list_tsv <- function(path = ".",
                      regexp = "[.]tsv$",
                      ignore.case = TRUE,
                      invert = FALSE,
-                     header = TRUE,
-                     sep = "\t",
-                     quote = "\"",
-                     dec = ".",
-                     fill = TRUE,
-                     comment.char = "",
-                     stringsAsFactors = FALSE,
-                     na.strings = c("", "NA"),
                      ...) {
   list_any(
     path,
-    function(x) utils::read.csv(
-        file = x,
-        header = header,
-        sep = sep,
-        quote = quote,
-        dec = dec,
-        fill = fill,
-        comment.char = comment.char,
-        stringsAsFactors = stringsAsFactors,
-        na.strings = na.strings
-      ),
+    .f = readr::read_tsv,
     regexp = regexp,
     ignore.case = ignore.case,
     invert = invert,

diff --git a/R/load_csv.R b/R/load_csv.R
@@ -8,15 +8,15 @@
 #' @examples
 #' (path_csv <- tor_example("csv"))
 #' dir(path_csv)
-#'
+#' 
 #' load_csv(path_csv)
 #' # Each dataframe is now available in the global environment
 #' csv1
 #' csv2
-#'
+#' 
 #' (path_mixed <- tor_example("mixed"))
 #' dir(path_mixed)
-#'
+#' 
 #' load_rdata(path_mixed)
 #' # Each dataframe is now available in the global environment
 #' lower_rdata
@@ -27,30 +27,11 @@ load_csv <- function(path = ".",
                      regexp = "[.]csv$",
                      ignore.case = TRUE,
                      invert = FALSE,
-                     header = TRUE,
-                     sep = ",",
-                     quote = "\"",
-                     dec = ".",
-                     fill = TRUE,
-                     comment.char = "",
-                     stringsAsFactors = FALSE,
-                     na.strings = c("", "NA"),
                      envir = .GlobalEnv,
                      ...) {
   lst <- list_any(
     path,
-    # TODO: Use function(x) to more obviously show what's going on
-    function(x) utils::read.csv(
-        file = x,
-        header = header,
-        sep = sep,
-        quote = quote,
-        dec = dec,
-        fill = fill,
-        comment.char = comment.char,
-        stringsAsFactors = stringsAsFactors,
-        na.strings = na.strings
-      ),
+    readr::read_csv,
     regexp = regexp,
     ignore.case = ignore.case,
     invert = invert,
@@ -67,29 +48,11 @@ load_tsv <- function(path = ".",
                      regexp = "[.]tsv$",
                      ignore.case = TRUE,
                      invert = FALSE,
-                     header = TRUE,
-                     sep = "\t",
-                     quote = "\"",
-                     dec = ".",
-                     fill = TRUE,
-                     comment.char = "",
-                     stringsAsFactors = FALSE,
-                     na.strings = c("", "NA"),
                      envir = .GlobalEnv,
                      ...) {
   lst <- list_any(
     path,
-    function(x) utils::read.csv(
-        file = x,
-        header = header,
-        sep = sep,
-        quote = quote,
-        dec = dec,
-        fill = fill,
-        comment.char = comment.char,
-        stringsAsFactors = stringsAsFactors,
-        na.strings = na.strings
-      ),
+    readr::read_tsv,
     regexp = regexp,
     ignore.case = ignore.case,
     invert = invert,

diff --git a/R/tor_example.R b/R/tor_example.R
@@ -9,9 +9,9 @@
 #'
 #' @examples
 #' tor_example()
-#'
+#' 
 #' tor_example("csv")
-#'
+#' 
 #' dir(tor_example("csv"))
 #' @family helpers
 #' @export

diff --git a/README.md b/README.md
@@ -33,12 +33,12 @@ devtools::install_github("maurolepore/tor")
 
 ``` r
 library(tidyverse)
-#> -- Attaching packages --------------------------------------------- tidyverse 1.2.1 --
+#> -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
 #> v ggplot2 3.1.0     v purrr   0.2.5
 #> v tibble  2.0.0     v dplyr   0.7.8
 #> v tidyr   0.8.2     v stringr 1.3.1
 #> v readr   1.3.1     v forcats 0.3.0
-#> -- Conflicts ------------------------------------------------ tidyverse_conflicts() --
+#> -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
 #> x dplyr::filter() masks stats::filter()
 #> x dplyr::lag()    masks stats::lag()
 library(fs)
@@ -61,10 +61,18 @@ dir()
 #> [19] "tor.Rproj"
 
 list_csv()
+#> Parsed with column specification:
+#> cols(
+#>   x = col_double()
+#> )
+#> Parsed with column specification:
+#> cols(
+#>   y = col_character()
+#> )
 #> $csv1
 #> # A tibble: 2 x 1
 #>       x
-#>   <int>
+#>   <dbl>
 #> 1     1
 #> 2     2
 #> 
@@ -375,6 +383,14 @@ dir(pattern = "[.]csv$")
 #> [1] "csv1.csv" "csv2.csv"
 
 dfms <- list_csv()
+#> Parsed with column specification:
+#> cols(
+#>   x = col_double()
+#> )
+#> Parsed with column specification:
+#> cols(
+#>   y = col_character()
+#> )
 
 format_path(names(dfms), "csv")
 #> [1] "./csv1.csv" "./csv2.csv"
@@ -396,10 +412,18 @@ map_chr(dfms, ~ format_path(names(.), "csv", ".", "this-"))
 #> "./this-x.csv" "./this-y.csv"
 
 (dfs <- list_csv())
+#> Parsed with column specification:
+#> cols(
+#>   x = col_double()
+#> )
+#> Parsed with column specification:
+#> cols(
+#>   y = col_character()
+#> )
 #> $csv1
 #> # A tibble: 2 x 1
 #>       x
-#>   <int>
+#>   <dbl>
 #> 1     1
 #> 2     2
 #>