diff --git a/DESCRIPTION b/DESCRIPTION index 100715a0..700585ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: skimr Title: Compact and Flexible Summaries of Data -Version: 2.1.1 +Version: 2.1.2 Authors@R: c(person(given = "Elin", family = "Waring", @@ -83,8 +83,16 @@ Authors@R: person(given = "Daniel", family = "Possenriede", role = "ctb", - email = "possenriede@gmail.com")) -Description: A simple to use summary function that can be used with pipes + email = "possenriede@gmail.com"), + person(given = "David", + family = "Zimmermann", + role = "ctb", + email = "david_j_zimmermann@hotmail.com"), + person(given = "Kyle", + family = "Butts", + role ="ctb", + email = "")) +Description: A simple to use summary function that can be used buttskyle96@gmail.comwith pipes and displays nicely in the console. The default summary statistics may be modified by the user as can the default formatting. Support for data frames and vectors is included, and users can implement their own @@ -117,6 +125,7 @@ Imports: Suggests: covr, extrafont, + lubridate, rmarkdown, sf, testthat (>= 2.0.0) @@ -125,7 +134,7 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.1 Collate: 'deprecated.R' 'dplyr.R' diff --git a/NAMESPACE b/NAMESPACE index ff68fe7b..f87e9143 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(get_skimmers,AsIs) S3method(get_skimmers,Date) S3method(get_skimmers,POSIXct) +S3method(get_skimmers,Timespan) S3method(get_skimmers,character) S3method(get_skimmers,complex) S3method(get_skimmers,default) diff --git a/NEWS.md b/NEWS.md index 406d9292..ce57d229 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,14 @@ +# skimr 2.1.2 + +### MINOR IMPROVEMENTS + +* Add support for lubridate Timespan objects. +* Improvements to Supporting Additional Objects vignette. + +### BUG FIXES + +* Update package to work with new version of `knitr`. + # skimr 2.1.1 (2020-04-15) ### MINOR IMPROVEMENTS diff --git a/R/get_skimmers.R b/R/get_skimmers.R index 22775711..dc0ec59a 100644 --- a/R/get_skimmers.R +++ b/R/get_skimmers.R @@ -167,6 +167,13 @@ get_skimmers.difftime <- function(column) { modify_default_skimmers("Date", new_skim_type = "difftime") } +#' @describeIn get_skimmers Summary functions for `Timespan` columns: [min()], +#' [max()], [median()] and [n_unique()]. +#' @export +get_skimmers.Timespan <- function(column) { + modify_default_skimmers("difftime", new_skim_type = "Timespan") +} + #' @describeIn get_skimmers Summary functions for `ts` columns: [min()], #' [max()], [median()] and [n_unique()]. #' @export diff --git a/R/stats.R b/R/stats.R index 0e2438f3..ddc8f711 100644 --- a/R/stats.R +++ b/R/stats.R @@ -39,7 +39,7 @@ complete_rate <- function(x) { #' values using s+ regex. #' @export n_whitespace <- function(x) { - whitespace <- grepl("^\\s+", x) + whitespace <- grepl("^\\s+$", x) sum(whitespace) } diff --git a/codemeta.json b/codemeta.json index d3149b44..42698601 100644 --- a/codemeta.json +++ b/codemeta.json @@ -5,19 +5,18 @@ ], "@type": "SoftwareSourceCode", "identifier": "skimr", - "description": "A simple to use summary function that can be used\n with pipes and displays nicely in the console. The default summary\n statistics may be modified by the user as can the default formatting.\n Support for data frames and vectors is included, and users can\n implement their own skim methods for specific object types as\n described in a vignette. Default summaries include support for inline\n spark graphs. Instructions for managing these on specific operating\n systems are given in the \"Using skimr\" vignette and the README.", + "description": "A simple to use summary function that can be used buttskyle96@gmail.comwith pipes\n and displays nicely in the console. The default summary statistics may\n be modified by the user as can the default formatting. Support for\n data frames and vectors is included, and users can implement their own\n skim methods for specific object types as described in a vignette.\n Default summaries include support for inline spark graphs.\n Instructions for managing these on specific operating systems are\n given in the \"Using skimr\" vignette and the README.", "name": "skimr: Compact and Flexible Summaries of Data", "codeRepository": "https://github.com/ropensci/skimr", "issueTracker": "https://github.com/ropensci/skimr/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "2.1", + "version": "2.1.2", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", - "version": "3.6.1", "url": "https://r-project.org" }, - "runtimePlatform": "R version 3.6.1 (2019-07-05)", + "runtimePlatform": "R version 4.0.1 (2020-06-06)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -145,6 +144,17 @@ "givenName": "Daniel", "familyName": "Possenriede", "email": "possenriede@gmail.com" + }, + { + "@type": "Person", + "givenName": "David", + "familyName": "Zimmermann", + "email": "david_j_zimmermann@hotmail.com" + }, + { + "@type": "Person", + "givenName": "Kyle", + "familyName": "Butts" } ], "copyrightHolder": [ @@ -186,6 +196,18 @@ }, "sameAs": "https://CRAN.R-project.org/package=extrafont" }, + { + "@type": "SoftwareApplication", + "identifier": "lubridate", + "name": "lubridate", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=lubridate" + }, { "@type": "SoftwareApplication", "identifier": "rmarkdown", @@ -415,15 +437,12 @@ ], "releaseNotes": "https://github.com/ropensci/skimr/blob/master/NEWS.md", "readme": "https://github.com/ropensci/skimr/blob/master/README.md", - "fileSize": "1028.792KB", - "contIntegration": [ - "https://travis-ci.org/ropenscilabs/skimr", - "https://codecov.io/gh/ropenscilabs/skimr" - ], + "fileSize": "364473.922KB", + "contIntegration": ["https://travis-ci.org/ropensci/skimr", "https://ci.appveyor.com/project/michaelquinn32/skimr", "https://codecov.io/gh/ropensci/skimr"], "review": { "@type": "Review", - "url": "https://github.com/ropensci/onboarding/issues/175", - "provider": "http://ropensci.org" + "url": "https://github.com/ropensci/software-review/issues/175", + "provider": "https://ropensci.org" }, "keywords": [ "unconf17", diff --git a/man/get_skimmers.Rd b/man/get_skimmers.Rd index af77f241..4d745029 100644 --- a/man/get_skimmers.Rd +++ b/man/get_skimmers.Rd @@ -11,6 +11,7 @@ \alias{get_skimmers.Date} \alias{get_skimmers.POSIXct} \alias{get_skimmers.difftime} +\alias{get_skimmers.Timespan} \alias{get_skimmers.ts} \alias{get_skimmers.list} \alias{get_skimmers.AsIs} @@ -37,6 +38,8 @@ get_skimmers(column) \method{get_skimmers}{difftime}(column) +\method{get_skimmers}{Timespan}(column) + \method{get_skimmers}{ts}(column) \method{get_skimmers}{list}(column) @@ -107,6 +110,9 @@ default for unknown columns: \code{\link[=min_char]{min_char()}}, \code{\link[=m \item \code{difftime}: Summary functions for \code{difftime} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. +\item \code{Timespan}: Summary functions for \code{Timespan} columns: \code{\link[=min]{min()}}, +\code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. + \item \code{ts}: Summary functions for \code{ts} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. diff --git a/man/mutate.skim_df.Rd b/man/mutate.skim_df.Rd index d148e58d..f0bc62c3 100644 --- a/man/mutate.skim_df.Rd +++ b/man/mutate.skim_df.Rd @@ -16,9 +16,9 @@ be the name of a new variable, and the value will be its corresponding value. Use \code{NULL} value in \code{mutate} to drop a variable. New variables overwrite existing variables of the same name. -The arguments in \code{...} are automatically quoted with \code{\link[rlang:quo]{rlang::quo()}} and +The arguments in \code{...} are automatically quoted with \code{\link[rlang:nse-defuse]{rlang::quo()}} and evaluated with \code{\link[rlang:eval_tidy]{rlang::eval_tidy()}} in the context of the data frame. They -support unquoting \code{\link[rlang:quasiquotation]{rlang::quasiquotation}} and splicing. See +support unquoting \code{\link[rlang:nse-force]{rlang::quasiquotation}} and splicing. See \code{vignette("programming", package = "dplyr")} for an introduction to these concepts.} } diff --git a/man/print.Rd b/man/print.Rd index f06cc434..cdf30e30 100644 --- a/man/print.Rd +++ b/man/print.Rd @@ -106,8 +106,8 @@ package. You can control printing behavior using the same global options. Printing a \code{skim_df} requires specific columns that might be dropped when -using \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:summarize]{dplyr::summarize()}} on a \code{skim_df}. In those -cases, this method falls back to \code{\link[tibble:print.tbl]{tibble::print.tbl()}}. +using \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:summarise]{dplyr::summarize()}} on a \code{skim_df}. In those +cases, this method falls back to \code{\link[tibble:formatting]{tibble::print.tbl()}}. } \section{Controlling metadata behavior}{ @@ -122,7 +122,7 @@ empty you may need to run the following \code{options(crayon.enabled = FALSE)}. } \seealso{ -\code{\link[tibble:trunc_mat]{tibble::trunc_mat()}} For a list of global options for customizing +\code{\link[tibble:formatting]{tibble::trunc_mat()}} For a list of global options for customizing print formatting. \code{\link[crayon:has_color]{crayon::has_color()}} for the variety of issues that affect tibble's color support. } diff --git a/man/reexports.Rd b/man/reexports.Rd index a41446f2..53fd354b 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -18,8 +18,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{magrittr}{\code{\link[magrittr]{\%>\%}}} + \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} - \item{tidyselect}{\code{\link[tidyselect]{contains}}, \code{\link[tidyselect]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect]{matches}}, \code{\link[tidyselect]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} + \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} }} diff --git a/man/sfl.Rd b/man/sfl.Rd index 4383b8e7..a84b82b3 100644 --- a/man/sfl.Rd +++ b/man/sfl.Rd @@ -34,7 +34,7 @@ returned by \code{dplyr::funs()} and a list of skimming functions to drop. \description{ This constructor is used to create a named list of functions. It also you also pass \code{NULL} to identify a skimming function that you wish to remove. -Only functions that return a single value, working with \code{\link[dplyr:summarize]{dplyr::summarize()}}, +Only functions that return a single value, working with \code{\link[dplyr:summarise]{dplyr::summarize()}}, can be used within \code{sfl}. } \details{ diff --git a/tests/testthat/print/knit_print-old_knitr.txt b/tests/testthat/print/knit_print-old_knitr.txt new file mode 100644 index 00000000..b9356562 --- /dev/null +++ b/tests/testthat/print/knit_print-old_knitr.txt @@ -0,0 +1,31 @@ + +| | | +|:------------------------|:----| +|Name |iris | +|Number of rows |150 | +|Number of columns |5 | +|_______________________ | | +|Column type frequency: | | +|factor |1 | +|numeric |4 | +|________________________ | | +|Group variables |None | + + +**Variable type: factor** + +|skim_variable | n_missing| complete_rate|ordered | n_unique|top_counts | +|:-------------|---------:|-------------:|:-------|--------:|:-------------------------| +|Species | 0| 1|FALSE | 3|set: 50, ver: 50, vir: 50 | + + +**Variable type: numeric** + +|skim_variable | n_missing| complete_rate| mean| sd| p0| p25| p50| p75| p100|hist | +|:-------------|---------:|-------------:|----:|----:|---:|---:|----:|---:|----:|:-----| +|Sepal.Length | 0| 1| 5.84| 0.83| 4.3| 5.1| 5.80| 6.4| 7.9|▆▇▇▅▂ | +|Sepal.Width | 0| 1| 3.06| 0.44| 2.0| 2.8| 3.00| 3.3| 4.4|▁▆▇▂▁ | +|Petal.Length | 0| 1| 3.76| 1.77| 1.0| 1.6| 4.35| 5.1| 6.9|▇▁▆▇▂ | +|Petal.Width | 0| 1| 1.20| 0.76| 0.1| 0.3| 1.30| 1.8| 2.5|▇▁▇▅▃ | + + diff --git a/tests/testthat/print/knit_print-summary-old_knitr.txt b/tests/testthat/print/knit_print-summary-old_knitr.txt new file mode 100644 index 00000000..f7e7159d --- /dev/null +++ b/tests/testthat/print/knit_print-summary-old_knitr.txt @@ -0,0 +1,12 @@ + +| | | +|:------------------------|:----| +|Name |iris | +|Number of rows |150 | +|Number of columns |5 | +|_______________________ | | +|Column type frequency: | | +|factor |1 | +|numeric |4 | +|________________________ | | +|Group variables |None | diff --git a/tests/testthat/print/knit_print-summary.txt b/tests/testthat/print/knit_print-summary.txt index ae880bf8..b86b9271 100644 --- a/tests/testthat/print/knit_print-summary.txt +++ b/tests/testthat/print/knit_print-summary.txt @@ -1,3 +1,5 @@ +Table: Data summary + | | | |:------------------------|:----| |Name |iris | diff --git a/tests/testthat/print/knit_print.txt b/tests/testthat/print/knit_print.txt index b9356562..41f38251 100644 --- a/tests/testthat/print/knit_print.txt +++ b/tests/testthat/print/knit_print.txt @@ -1,4 +1,6 @@ +Table: Data summary + | | | |:------------------------|:----| |Name |iris | diff --git a/tests/testthat/test-get_skimmers.R b/tests/testthat/test-get_skimmers.R index 1789b77e..7af06830 100644 --- a/tests/testthat/test-get_skimmers.R +++ b/tests/testthat/test-get_skimmers.R @@ -17,7 +17,7 @@ test_that("get_default_skimmer_names() has a correct list of defaults", { names(defaults), c( "AsIs", "character", "complex", "Date", "difftime", "factor", "list", - "logical", "numeric", "POSIXct", "ts" + "logical", "numeric", "POSIXct", "Timespan", "ts" ) ) expect_identical(defaults$AsIs, c("n_unique", "min_length", "max_length")) @@ -27,6 +27,7 @@ test_that("get_default_skimmer_names() has a correct list of defaults", { expect_identical(defaults$complex, c("mean")) expect_identical(defaults$Date, c("min", "max", "median", "n_unique")) expect_identical(defaults$difftime, c("min", "max", "median", "n_unique")) + expect_identical(defaults$Timespan, c("min", "max", "median", "n_unique")) expect_identical(defaults$factor, c("ordered", "n_unique", "top_counts")) expect_identical(defaults$list, c("n_unique", "min_length", "max_length")) expect_identical(defaults$logical, c("mean", "count")) diff --git a/tests/testthat/test-skim.R b/tests/testthat/test-skim.R index 842e5d86..d477a28e 100644 --- a/tests/testthat/test-skim.R +++ b/tests/testthat/test-skim.R @@ -629,6 +629,47 @@ test_that("skim returns expected response for difftime vectors", { expect_identical(input$difftime.n_unique, 9L) }) +test_that("skim returns expected response for lubridate Timespan vectors", { + dt <- tibble::tibble(x = lubridate::duration(1)) + input <- skim(dt) + + # dimensions + expect_n_rows(input, 1) + expect_n_columns(input, 8) + + # classes + expect_is(input, "skim_df") + expect_is(input, "tbl_df") + expect_is(input, "tbl") + expect_is(input, "data.frame") + expect_named(input, c( + "skim_type", "skim_variable", "n_missing", "complete_rate", + "Timespan.min", "Timespan.max", "Timespan.median", + "Timespan.n_unique" + )) + + # attributes + attrs <- attributes(input) + expect_equal(attrs$data_rows, 1) + expect_equal(attrs$data_cols, 1) + expect_equal(attrs$df_name, "`dt`") + expect_equal( + attrs$skimmers_used, + list(Timespan = c("min", "max", "median", "n_unique")) + ) + + # values + expect_identical(input$skim_variable, "x") + expect_identical(input$skim_type, "Timespan") + expect_identical(input$n_missing, 0L) + expect_equal(input$complete_rate, 1, tolerance = .001) + expect_identical(input$Timespan.min, 1) + expect_identical(input$Timespan.max, 1) + expect_identical(input$Timespan.median, lubridate::duration(1)) + expect_identical(input$Timespan.n_unique, 1L) +}) + + test_that("skim handles objects with multiple classes", { dat <- seq(as.Date("2011-07-01"), by = 1, len = 10) dat[2] <- NA diff --git a/tests/testthat/test-skim_print.R b/tests/testthat/test-skim_print.R index 3fb327b0..c0dd7eac 100644 --- a/tests/testthat/test-skim_print.R +++ b/tests/testthat/test-skim_print.R @@ -30,14 +30,23 @@ test_that("knit_print produces expected results", { input <- knit_print(skimmed) expect_is(input, "knit_asis") expect_length(input, 1) - expect_matches_file(input, "print/knit_print.txt") + if (packageVersion("knitr") <= "1.28") { + expect_matches_file(input, "print/knit_print-knitr_old.txt") + } else { + expect_matches_file(input, "print/knit_print.txt") + } }) test_that("knit_print works with skim summaries", { - skimmed <- skim(iris) - summarized <- summary(skimmed) - input <- knitr::knit_print(summarized) + + skimmed <- skim(iris) + summarized <- summary(skimmed) + input <- knitr::knit_print(summarized) + if (packageVersion("knitr") <= "1.28") { + expect_matches_file(input, "print/knit_print-summary-knitr_old.txt") + } else { expect_matches_file(input, "print/knit_print-summary.txt") + } }) test_that("knit_print appropriately falls back to tibble printing", { diff --git a/tests/testthat/test-stats.R b/tests/testthat/test-stats.R index eaa13091..15f0b301 100644 --- a/tests/testthat/test-stats.R +++ b/tests/testthat/test-stats.R @@ -44,9 +44,9 @@ test_that("complete_rate is calculated correctly.", { }) test_that("n_whitespace is calculated correctly.", { - data <- c("a", "b", "c", NA, " ") + data <- c("a", "b", "c", NA, " ", " a", " ", " a") input <- n_whitespace(data) - expect_identical(input, 1L) + expect_identical(input, 2L) }) test_that("inline histogram is calculated correctly.", { diff --git a/vignettes/Supporting_additional_objects.Rmd b/vignettes/Supporting_additional_objects.Rmd index bb1868ef..b9abab0c 100644 --- a/vignettes/Supporting_additional_objects.Rmd +++ b/vignettes/Supporting_additional_objects.Rmd @@ -24,11 +24,9 @@ involves two required elements and one optional element. - if needed, define any custom statistics If you are adding skim support to a package you will also need to add `skimr` -to the list of imports. Note that in this vignette the actual analysis will -not be run because that would require importing the `sf` package just for this -example. However to run it on your own you can install `sf` and then run the -following code. Note that code in this vignette was not evaluated when -rendering the vignette in order to avoid forcing installation of sf. +to the list of imports. Note that to run the code in this vignette you will +need to install the `sf` package. We suggest not doing that, and instead +substitute whatever package you are working with. ```{r} library(skimr) @@ -39,6 +37,8 @@ nc <- st_read(system.file("shape/nc.shp", package = "sf")) ```{r} class(nc) + +class(nc$geometry) ``` Unlike the example of having a new type of data in a column of a simple data @@ -65,11 +65,13 @@ back to treating the type as a character, which isn't necessarily helpful. In this case, you're best off adding your data type with `skim_with()`. Before we begin, we'll be using the following custom summary statistic -throughout. It's a naive example, but covers the requirements of what we need. +throughout. The function gets the geometry's crs and combines it into a string. ```{r} -funny_sf <- function(x) { - length(x) + 1 +get_crs <- function(column) { + crs <- sf::st_crs(column) + + paste0("epsg: ", crs[["epsg"]], " proj4string: '", crs[["proj4string"]], "'") } ``` @@ -92,71 +94,41 @@ default `skimr` percentiles are returned by using `quantile()` five times. Next, we create a custom skimming function. To do this, we need to think about -the many specific classes of data in the `sf` package. The following example -will build support for `sfc_MULTIPOLYGON`, but note that we'll have to -eventually think about `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT` and -others if we want to fully support `sf`. +the many specific classes of data in the `sf` package. From above, you can see +the geometry column has two classes: 1st the specific geometry type (e.g. +`sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`) and 2nd +the general sfc class. Skimr will try to find a sfl() helper function for the +classes in the order they appear in `class(.)` (see S3 classes for more detail +[*Advanced R*](https://adv-r.hadley.nz/s3.html)). The following example will +build support for `sfc`, which encompasses all `sf` objects: `sfc_MULTIPOLYGON` +`sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`. If we want custom skim_with +functions we can write `sfl()` helper functions for the geometry type. + ```{r} skim_sf <- skim_with( - sfc_MULTIPOLYGON = sfl( + sfc = sfl( n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf + crs = get_crs ) ) ``` The example above creates a new *function*, and you can call that function on -a specific column with `sfc_MULTIPOLYGON` data to get the appropriate summary -statistics. +a specific column with `sfc` data to get the appropriate summary +statistics. The `skim_with` factory also uses the default skimrs for things +like factors, characters, and numerics. Therefore our `skim_sf` is like the regular +`skim` function with the added ability to summarize `sfc` columns. ```{r} skim_sf(nc$geometry) ``` -Creating a function that is a method of the skim_by_type generic -for the data type allows skimming of an entire data frame that contains some -columns of that type. - -```{r} -skim_by_type.sfc_MULTIPOLYGON <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - build_results(skimmed, columns, NULL) -} -``` - -```{r} -skim_sf(nc) -``` - - -Sharing these functions within a separate package requires an export. -The simplest way to do this is with Roxygen. - -```{r} -#' Skimming functions for `sfc_MULTIPOLYGON` objects. -#' @export -skim_sf <- skim_with( - sfc_MULTIPOLYGON = sfl( - missing = n_missing, - n = length, - n_unique = n_unique, - valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf - ) -) - -#' A skim_by_type function for `sfc_MULTIPOLYGON` objects. -#' @export -skim_by_type.sfc_MULTIPOLYGON <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - skimr::build_results(skimmed, columns, NULL) -} -``` -While this works within any package, there is an even better approach in this -case. To take full advantage of `skimr`, we'll dig a bit into its API. +While this works for any data type and you can also include it within any +package (assuming your users load skimr), there is an even better approach in +this case. To take full advantage of `skimr`, we'll dig a bit into its API. ## Adding new methods @@ -165,21 +137,25 @@ find default summary functions for each class. This is based on the S3 class system. You can learn more about it in [*Advanced R*](https://adv-r.hadley.nz/s3.html). +This requires that you add `skimr` to your list of dependencies. + To export a new set of defaults for a data type, create a method for the generic function `get_skimmers`. Each of those methods returns an `sfl`, a `skimr` function list. This is the same list-like data structure used in the `skim_with()` example above. But note! There is one key difference. When adding -a generic we also want to identify the `skim_type` in the `sfl`. +a generic we also want to identify the `skim_type` in the `sfl`. You will +probably want to use `skimr::get_skimmers.sfc()` but that will not work in a +vignette. ```{r} #' @importFrom skimr get_skimmers #' @export -get_skimmers.sfc_MULTIPOLYGON <- function(column) { +get_skimmers.sfc <- function(column) { sfl( - skim_type = "sfc_MULTIPOLYGON", + skim_type = "sfc", n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf + crs = get_crs ) } ``` @@ -190,32 +166,27 @@ The same strategy follows for other data types. * return an `sfl` * make sure that the `skim_type` is there -```{r} -#' @export -get_skimmers.sfc_POINT <- function(column) { - sfl( - skim_type = "sfc_POINT", - n_unique = n_unique, - valid = ~ sum(sf::st_is_valid(.)) - ) -} -``` - -Users of your package should load `skimr` to get the `skim()` function. Once +Users of your package should load `skimr` to get the `skim()` function +(although you could import and reexport it). Once loaded, a call to `get_default_skimmer_names()` will return defaults for your -data types as well! +data types as well! ```{r} get_default_skimmer_names() ``` +They will then be able to use `skim()` directly. + +```{r} +skim(nc) ``` + ## Conclusion -This is a very simple example. For a package such as sf the custom statistics +This is a very simple example. For a package such as `sf` the custom statistics will likely be much more complex. The flexibility of `skimr` allows you to manage that. -Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma and Michael Sumner for -inspiring and helping with the development of this code. +Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma, Michael Sumner, and +Kyle Butts for inspiring and helping with the development of this code.