From 4cd18d3b65cd6139e5cfc3602dffed6f75a1c645 Mon Sep 17 00:00:00 2001 From: Mauro Lepore Date: Thu, 3 Jan 2019 13:32:02 -0500 Subject: [PATCH] Refactor (#1) * Refine doc. * Refactor list_rds(), list_rdata(), list_csv() and list_tsv(): * New argument `regexp` * New argument `invert` * New argument `ignore.case` * Reorder arguments for consistency. --- R/list_rds.R | 57 ++++--- R/rds_list.R | 0 README.Rmd | 18 +-- README.md | 42 ++--- man/list_rds.Rd | 19 ++- .../{test-read_with.R => test-list_any.R} | 0 tests/testthat/test-list_rds.R | 146 ++++++++++++++++++ tests/testthat/test-rds_list.R | 56 ------- 8 files changed, 222 insertions(+), 116 deletions(-) create mode 100644 R/rds_list.R rename tests/testthat/{test-read_with.R => test-list_any.R} (100%) create mode 100644 tests/testthat/test-list_rds.R delete mode 100644 tests/testthat/test-rds_list.R diff --git a/R/list_rds.R b/R/list_rds.R index 0faa38b..2071301 100644 --- a/R/list_rds.R +++ b/R/list_rds.R @@ -28,24 +28,40 @@ #' dir(tsv) #' #' list_tsv(tsv) -list_rds <- function(path = ".") { - list_any(path, base::readRDS, regexp = "[.]rds$", ignore.case = TRUE) +list_rds <- function(path = ".", + regexp = "[.]rds$", + ignore.case = TRUE, + invert = FALSE) { + list_any( + path, + base::readRDS, + regexp = regexp, + ignore.case = ignore.case, + invert = invert + ) } #' @rdname list_rds #' @export -list_rdata <- function(path = ".") { +list_rdata <- function(path = ".", + regexp = "[.]rdata$|[.]rda$", + ignore.case = TRUE, + invert = FALSE) { list_any( path, ~get(load(.x)), - regexp = "[.]rdata$|[.]rda$", - ignore.case = TRUE + regexp = regexp, + ignore.case = ignore.case, + invert = invert ) } #' @rdname list_rds #' @export list_csv <- function(path = ".", + regexp = "[.]csv$", + ignore.case = TRUE, + invert = FALSE, header = TRUE, sep = ",", quote = "\"", @@ -68,8 +84,9 @@ list_csv <- function(path = ".", stringsAsFactors = stringsAsFactors, na.strings = na.strings ), - regexp = "[.]csv$", - ignore.case = TRUE, + regexp = regexp, + ignore.case = ignore.case, + invert = invert, ... ) } @@ -77,15 +94,18 @@ list_csv <- function(path = ".", #' @rdname list_rds #' @export list_tsv <- function(path = ".", - header = TRUE, - sep = "\t", - quote = "\"", - dec = ".", - fill = TRUE, - comment.char = "", - stringsAsFactors = FALSE, - na.strings = c("", "NA"), - ...) { + regexp = "[.]tsv$", + ignore.case = TRUE, + invert = FALSE, + header = TRUE, + sep = "\t", + quote = "\"", + dec = ".", + fill = TRUE, + comment.char = "", + stringsAsFactors = FALSE, + na.strings = c("", "NA"), + ...) { list_any( path, ~utils::read.csv( @@ -99,8 +119,9 @@ list_tsv <- function(path = ".", stringsAsFactors = stringsAsFactors, na.strings = na.strings ), - regexp = "[.]tsv$", - ignore.case = TRUE, + regexp = regexp, + ignore.case = ignore.case, + invert = invert, ... ) } diff --git a/R/rds_list.R b/R/rds_list.R new file mode 100644 index 0000000..e69de29 diff --git a/README.Rmd b/README.Rmd index 389e03e..687c6ee 100644 --- a/README.Rmd +++ b/README.Rmd @@ -22,7 +22,7 @@ knitr::opts_chunk$set( [![Coverage status](https://coveralls.io/repos/github/maurolepore/tor/badge.svg)](https://coveralls.io/r/maurolepore/tor?branch=master) [![CRAN status](https://www.r-pkg.org/badges/version/tor)](https://cran.r-project.org/package=tor) -The goal of __tor__ is to import multiple files of any kind into R. It does nothing you can't do with functions from base R (or [__fs__](https://fs.r-lib.org/) plus [__purrr__](https://purrr.tidyverse.org/ plus some reader package)) but it provides a shortcut to save your time and brain power for more important tasks. __tor__ is flexible and small, and works with tools from the [tidyverse](https://www.tidyverse.org/). +The goal of __tor__ (_to-R_) is to import multiple files of any kind into R. It does nothing you can't do with functions from base R (or [__fs__](https://fs.r-lib.org/) plus [__purrr__](https://purrr.tidyverse.org/) plus some reader package) but it provides a shortcut to save your time and brain power for more important tasks. __tor__ is flexible and small, and works well with tools from the [tidyverse](https://www.tidyverse.org/). ## Installation @@ -34,7 +34,7 @@ devtools::install_github("maurolepore/tor") ## Example ```{r} -library(magrittr) +library(purrr) library(fs) library(tor) ``` @@ -71,7 +71,7 @@ list_csv(path_mixed) list_rdata(path_mixed) ``` -`list_any()` is the most flexible. You supply the function to read with. +`list_any()` is the most flexible function. You supply the function to read with. ```{r} (path_csv <- tor_example("csv")) @@ -94,7 +94,7 @@ path_rdata %>% list_any(~get(load(.x))) ``` -Pass additional arguments via `...` or inside the lambda function (as `lapply()`). +Pass additional arguments via `...` or inside the lambda function. ```{r} list_any(path_csv, read.csv, stringsAsFactors = FALSE) @@ -117,9 +117,7 @@ path_mixed %>% ### Writing data -__tor__ does not write data. Compared to reading, writing data is a little easier because you have all the tools from R to choose what to write, how and where. - -Yet __tor__ helps you in a small, important way. Because creating the paths to write files may interrupt your workflow, __torr__ provides a helper to do just that. +__tor__ does not write data but includes a helper to create the paths to output files. ```{r} dir(pattern = "[.]csv$") @@ -128,14 +126,12 @@ dfms <- list_csv() format_path(names(dfms), "csv") -format_path(names(dfms), "csv", "base", "prefix-") +format_path(names(dfms), "csv", base = "home", prefix = "this-") ``` -Combine it with [__purrr__](https://purrr.tidyverse.org/. +Combine it with [__purrr__](https://purrr.tidyverse.org/). ```{r} -library(purrr) - imap_chr(dfms, ~ format_path(.y, "csv")) # Same diff --git a/README.md b/README.md index 79e2041..d190241 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,13 @@ status](https://coveralls.io/repos/github/maurolepore/tor/badge.svg)](https://co [![CRAN status](https://www.r-pkg.org/badges/version/tor)](https://cran.r-project.org/package=tor) -The goal of **tor** is to import multiple files of any kind into R. It -does nothing you can’t do with functions from base R (or +The goal of **tor** (*to-R*) is to import multiple files of any kind +into R. It does nothing you can’t do with functions from base R (or [**fs**](https://fs.r-lib.org/) plus -[**purrr**](https://purrr.tidyverse.org/%20plus%20some%20reader%20package)) -but it provides a shortcut to save your time and brain power for more -important tasks. **tor** is flexible and small, and works with tools -from the [tidyverse](https://www.tidyverse.org/). +[**purrr**](https://purrr.tidyverse.org/) plus some reader package) but +it provides a shortcut to save your time and brain power for more +important tasks. **tor** is flexible and small, and works well with +tools from the [tidyverse](https://www.tidyverse.org/). ## Installation @@ -29,7 +29,7 @@ devtools::install_github("maurolepore/tor") ## Example ``` r -library(magrittr) +library(purrr) library(fs) library(tor) ``` @@ -127,7 +127,8 @@ list_rdata(path_mixed) #> 2 b ``` -`list_any()` is the most flexible. You supply the function to read with. +`list_any()` is the most flexible function. You supply the function to +read with. ``` r (path_csv <- tor_example("csv")) @@ -182,8 +183,7 @@ path_rdata %>% #> 2 b ``` -Pass additional arguments via `...` or inside the lambda function (as -`lapply()`). +Pass additional arguments via `...` or inside the lambda function. ``` r list_any(path_csv, read.csv, stringsAsFactors = FALSE) @@ -250,13 +250,8 @@ path_mixed %>% ### Writing data -**tor** does not write data. Compared to reading, writing data is a -little easier because you have all the tools from R to choose what to -write, how and where. - -Yet **tor** helps you in a small, important way. Because creating the -paths to write files may interrupt your workflow, **torr** provides a -helper to do just that. +**tor** does not write data but includes a helper to create the paths to +output files. ``` r dir(pattern = "[.]csv$") @@ -267,20 +262,13 @@ dfms <- list_csv() format_path(names(dfms), "csv") #> [1] "./csv1.csv" "./csv2.csv" -format_path(names(dfms), "csv", "base", "prefix-") -#> [1] "base/prefix-csv1.csv" "base/prefix-csv2.csv" +format_path(names(dfms), "csv", base = "home", prefix = "this-") +#> [1] "home/this-csv1.csv" "home/this-csv2.csv" ``` -Combine it with \[**purrr**\](. +Combine it with [**purrr**](https://purrr.tidyverse.org/). ``` r -library(purrr) -#> -#> Attaching package: 'purrr' -#> The following object is masked from 'package:magrittr': -#> -#> set_names - imap_chr(dfms, ~ format_path(.y, "csv")) #> csv1 csv2 #> "./csv1.csv" "./csv2.csv" diff --git a/man/list_rds.Rd b/man/list_rds.Rd index a46c555..ea3feac 100644 --- a/man/list_rds.Rd +++ b/man/list_rds.Rd @@ -7,21 +7,32 @@ \alias{list_tsv} \title{Read multiple (common) files from a directory into a list.} \usage{ -list_rds(path = ".") +list_rds(path = ".", regexp = "[.]rds$", ignore.case = TRUE, + invert = FALSE) -list_rdata(path = ".") +list_rdata(path = ".", regexp = "[.]rdata$|[.]rda$", + ignore.case = TRUE, invert = FALSE) -list_csv(path = ".", header = TRUE, sep = ",", quote = "\\"", +list_csv(path = ".", regexp = "[.]csv$", ignore.case = TRUE, + invert = FALSE, header = TRUE, sep = ",", quote = "\\"", dec = ".", fill = TRUE, comment.char = "", stringsAsFactors = FALSE, na.strings = c("", "NA"), ...) -list_tsv(path = ".", header = TRUE, sep = "\\t", quote = "\\"", +list_tsv(path = ".", regexp = "[.]tsv$", ignore.case = TRUE, + invert = FALSE, header = TRUE, sep = "\\t", quote = "\\"", dec = ".", fill = TRUE, comment.char = "", stringsAsFactors = FALSE, na.strings = c("", "NA"), ...) } \arguments{ \item{path}{A character vector of one or more paths.} +\item{regexp}{A regular expression (e.g. \code{[.]csv$}) passed on to \code{\link[=grep]{grep()}} to filter paths.} + +\item{ignore.case}{if \code{FALSE}, the pattern matching is \emph{case + sensitive} and if \code{TRUE}, case is ignored during matching.} + +\item{invert}{If \code{TRUE} return files which do \emph{not} match} + \item{header}{a logical value indicating whether the file contains the names of the variables as its first line. If missing, the value is determined from the file format: \code{header} is set to \code{TRUE} diff --git a/tests/testthat/test-read_with.R b/tests/testthat/test-list_any.R similarity index 100% rename from tests/testthat/test-read_with.R rename to tests/testthat/test-list_any.R diff --git a/tests/testthat/test-list_rds.R b/tests/testthat/test-list_rds.R new file mode 100644 index 0000000..14ae01a --- /dev/null +++ b/tests/testthat/test-list_rds.R @@ -0,0 +1,146 @@ +context("list_rds") + +test_that("list_rds lists .rds files", { + expect_named( + list_rds(tor_example("rds")), + c("file1", "file2") + ) +}) + +test_that("list_rds defaults to read from working directory", { + expect_named(list_rds(), "rds") +}) + +test_that("list_rds reads specific .rds files", { + expect_named( + list_rds(tor_example("rds"), regexp = "file1"), + c("file1") + ) +}) + +test_that("list_rds allows inverting a `regexp` pattern", { + expect_named( + list_rds(tor_example("rds"), regexp = "file1", invert = TRUE), + c("file2") + ) +}) + +context("list_rdata") + +test_that("list_rdata lists .rdata, .Rdata, and .rda", { + expect_named( + list_rdata(tor_example("mixed")), + c("lower_rdata", "rda", "upper_rdata") + ) +}) + +test_that("list_rdata reads specific .rdata files (sensitive to `regexp`)", { + expect_named( + list_rdata(tor_example("mixed"), regexp = "lower_rdata"), + "lower_rdata" + ) +}) + +test_that("list_rdata is sensitive to `ignore.case`", { + expect_named( + list_rdata( + tor_example("mixed"), + regexp = "[.]RData$", + ignore.case = FALSE, + ), + "upper_rdata" + ) +}) + +test_that("list_rdata defaults to read from working directory", { + expect_named(list_rdata(), "rdata") +}) + +context("list_csv") + +test_that("list_csv can read .csv specifically in a mixed directory", { + expect_named( + list_csv(tor_example("mixed")), + "csv" + ) +}) + +test_that("list_csv defaults to `stringsAsFactors = FALSE`", { + expect_is(list_csv(tor_example("mixed"))[[1]]$y, "character") +}) + +test_that("list_csv defaults to read from working directory", { + expect_true(any("csv" %in% names(list_csv()))) +}) + +test_that("list_csv is sensitive to `regexp`, `invert, and `ignore.case`", { + expect_named( + list_csv( + tor_example("csv"), + regexp = "[.]CSV$", + ignore.case = TRUE, + ), + c("file1", "file2") + ) + + expect_named( + list_csv( + tor_example("csv"), + regexp = "[.]CSV$", + ignore.case = FALSE, + invert = TRUE + ), + c("file1", "file2") + ) + + expect_error( + list_csv( + tor_example("csv"), + regexp = "[.]CSV$", + ignore.case = FALSE, + ) + ) +}) + +context("list_tsv") + +test_that("list_tsv lists .tsv files", { + skip_if_not_installed("readr") + + readr <- readr::read_tsv(fs::dir_ls(tor_example("tsv"))[[1]]) + tor <- list_tsv(tor_example("tsv"))[[1]] + expect_equivalent(readr, tor) +}) + +test_that("list_tsv defaults to read from working directory", { + expect_named(list_tsv(), "tsv") +}) + +test_that("list_tsv is sensitive to `regexp`, `invert, and `ignore.case`", { + expect_named( + list_csv( + tor_example("tsv"), + regexp = "[.]TSV$", + ignore.case = TRUE, + ), + c("tsv1", "tsv2") + ) + + expect_named( + list_csv( + tor_example("tsv"), + regexp = "[.]TSV$", + ignore.case = FALSE, + invert = TRUE + ), + c("tsv1", "tsv2") + ) + + expect_error( + list_csv( + tor_example("tsv"), + regexp = "[.]TSV$", + ignore.case = FALSE, + ) + ) +}) diff --git a/tests/testthat/test-rds_list.R b/tests/testthat/test-rds_list.R deleted file mode 100644 index 51fdf15..0000000 --- a/tests/testthat/test-rds_list.R +++ /dev/null @@ -1,56 +0,0 @@ -context("list_rds") - -test_that("list_rds lists .rds files", { - expect_named( - list_rds(tor_example("rds")), - c("file1", "file2") - ) -}) - -test_that("list_rds defaults to read from working directory", { - expect_named(list_rds(), "rds") -}) - -context("list_rdata") - -test_that("list_rdata lists .rdata, .Rdata, and .rda", { - expect_named( - list_rdata(tor_example("mixed")), - c("lower_rdata", "rda", "upper_rdata") - ) -}) - -test_that("list_rdata defaults to read from working directory", { - expect_named(list_rdata(), "rdata") -}) - -context("list_csv") - -test_that("list_csv can read .csv specifically in a mixed directory", { - expect_named( - list_csv(tor_example("mixed")), - "csv" - ) -}) - -test_that("list_csv defaults to `stringsAsFactors = FALSE`", { - expect_is(list_csv(tor_example("mixed"))[[1]]$y, "character") -}) - -test_that("list_csv defaults to read from working directory", { - expect_true(any("csv" %in% names(list_csv()))) -}) - -context("list_tsv") - -test_that("list_tsv lists .tsv files", { - skip_if_not_installed("readr") - - readr <- readr::read_tsv(fs::dir_ls(tor_example("tsv"))[[1]]) - tor <- list_tsv(tor_example("tsv"))[[1]] - expect_equivalent(readr, tor) -}) - -test_that("list_tsv defaults to read from working directory", { - expect_named(list_tsv(), "tsv") -})