From 8f73e3967453efb9c83521a2debb988d66626594 Mon Sep 17 00:00:00 2001 From: kylebutts Date: Sun, 1 Mar 2020 22:55:43 -0700 Subject: [PATCH 01/21] Update "Supporting Additional Objects" vignette Few changes: 1. Since sf objects have class = "sfc_GEOMETRYTYPE" "sfc", we can define sfl for "sfc" that will be called if there does not exist and sfl for "sfc_GEOMETRYTYPE". 2. I made the example a bit more relevant for sf objects, namely summarizing the CRS (projection) being used. I do think that sf objects should be used as an included sfl, but that could be because I spend a lot of time working with spatial objects ;). 3. There is mention of skim_by_type.sfc_MULTIPOLYGON, which I don't think needs to be in here unless I misunderstand skimr's code. The "skim_with" factory should already use the base sfl's for numeric, factors, characters, etc. and as you can see the skim_sf() works automatically without the skim_by_type function. --- vignettes/Supporting_additional_objects.Rmd | 76 +++++++++------------ 1 file changed, 32 insertions(+), 44 deletions(-) diff --git a/vignettes/Supporting_additional_objects.Rmd b/vignettes/Supporting_additional_objects.Rmd index bb1868ef..4711bac0 100644 --- a/vignettes/Supporting_additional_objects.Rmd +++ b/vignettes/Supporting_additional_objects.Rmd @@ -39,6 +39,8 @@ nc <- st_read(system.file("shape/nc.shp", package = "sf")) ```{r} class(nc) + +class(nc$geometry) ``` Unlike the example of having a new type of data in a column of a simple data @@ -46,10 +48,11 @@ frame in the "Using skimr" vignette, this is a different type of object with special attributes. In this object there is also a column of a class that does not have default -skimmers. By default, skimr falls back to use the sfl for character variables. +skimmers. By default, skimr falls back to use the sfl for character variables. ```{r} skim(nc$geometry) + ``` @@ -65,11 +68,13 @@ back to treating the type as a character, which isn't necessarily helpful. In this case, you're best off adding your data type with `skim_with()`. Before we begin, we'll be using the following custom summary statistic -throughout. It's a naive example, but covers the requirements of what we need. +throughout. The function gets the geometry's crs and combines it into a string. ```{r} -funny_sf <- function(x) { - length(x) + 1 +get_crs <- function(column){ + crs <- sf::st_crs(column) + + paste0("epsg: ", crs[["epsg"]], " proj4string: '", crs[["proj4string"]], "'") } ``` @@ -92,67 +97,52 @@ default `skimr` percentiles are returned by using `quantile()` five times. Next, we create a custom skimming function. To do this, we need to think about -the many specific classes of data in the `sf` package. The following example -will build support for `sfc_MULTIPOLYGON`, but note that we'll have to -eventually think about `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT` and -others if we want to fully support `sf`. +the many specific classes of data in the `sf` package. From above, you can see +the geometry column has two classes: 1st the specific geometry type (e.g. +`sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`) and 2nd the +general sfc class. Skimr will try to find a sfl() helper function for the classes +in the order they appear in class(.) (see S3 classes for more detail +[*Advanced R*](https://adv-r.hadley.nz/s3.html)). The following example will build +support for `sfc`, which encompasses all `sf` objects: `sfc_MULTIPOLYGON` +`sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`. If we want custom skim_with +functions we can write sfl() helper functions for the geometry type. + ```{r} skim_sf <- skim_with( - sfc_MULTIPOLYGON = sfl( + sfc = sfl( n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf + crs= get_crs ) ) ``` The example above creates a new *function*, and you can call that function on -a specific column with `sfc_MULTIPOLYGON` data to get the appropriate summary -statistics. - -```{r} -skim_sf(nc$geometry) -``` - -Creating a function that is a method of the skim_by_type generic -for the data type allows skimming of an entire data frame that contains some -columns of that type. - -```{r} -skim_by_type.sfc_MULTIPOLYGON <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - build_results(skimmed, columns, NULL) -} -``` +a specific column with `sfc` data to get the appropriate summary +statistics. The `skim_with` factory also uses the default skimrs for things +like factors, characters, numerics, etc. Therefore our `skim_sf` is like the regular +`skim` function with the added ability to summarize `sfc` columns. ```{r} skim_sf(nc) ``` - Sharing these functions within a separate package requires an export. The simplest way to do this is with Roxygen. ```{r} -#' Skimming functions for `sfc_MULTIPOLYGON` objects. +#' Skimming functions for `sfc` objects. #' @export skim_sf <- skim_with( - sfc_MULTIPOLYGON = sfl( + sfc = sfl( missing = n_missing, n = length, n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf + crs = get_crs ) ) - -#' A skim_by_type function for `sfc_MULTIPOLYGON` objects. -#' @export -skim_by_type.sfc_MULTIPOLYGON <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - skimr::build_results(skimmed, columns, NULL) -} ``` While this works within any package, there is an even better approach in this @@ -174,12 +164,12 @@ a generic we also want to identify the `skim_type` in the `sfl`. ```{r} #' @importFrom skimr get_skimmers #' @export -get_skimmers.sfc_MULTIPOLYGON <- function(column) { +get_skimmers.sfc<- function(column) { sfl( - skim_type = "sfc_MULTIPOLYGON", + skim_type = "sfc", n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - funny = funny_sf + crs = get_crs ) } ``` @@ -209,8 +199,6 @@ data types as well! get_default_skimmer_names() ``` -``` - ## Conclusion This is a very simple example. For a package such as sf the custom statistics @@ -218,4 +206,4 @@ will likely be much more complex. The flexibility of `skimr` allows you to manage that. Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma and Michael Sumner for -inspiring and helping with the development of this code. +inspiring and helping with the development of this code. From af4397d5d91b8b55867ed3098461e7483b66211f Mon Sep 17 00:00:00 2001 From: kylebutts Date: Mon, 9 Mar 2020 10:39:03 -0600 Subject: [PATCH 02/21] Fixed styling for Supporting_additional_objects.Rmd --- vignettes/Supporting_additional_objects.Rmd | 38 +++++++++++++++++---- 1 file changed, 32 insertions(+), 6 deletions(-) diff --git a/vignettes/Supporting_additional_objects.Rmd b/vignettes/Supporting_additional_objects.Rmd index 4711bac0..09e4cfc5 100644 --- a/vignettes/Supporting_additional_objects.Rmd +++ b/vignettes/Supporting_additional_objects.Rmd @@ -52,7 +52,6 @@ skimmers. By default, skimr falls back to use the sfl for character variables. ```{r} skim(nc$geometry) - ``` @@ -71,9 +70,9 @@ Before we begin, we'll be using the following custom summary statistic throughout. The function gets the geometry's crs and combines it into a string. ```{r} -get_crs <- function(column){ +get_crs <- function(column) { crs <- sf::st_crs(column) - + paste0("epsg: ", crs[["epsg"]], " proj4string: '", crs[["proj4string"]], "'") } ``` @@ -101,7 +100,7 @@ the many specific classes of data in the `sf` package. From above, you can see the geometry column has two classes: 1st the specific geometry type (e.g. `sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`) and 2nd the general sfc class. Skimr will try to find a sfl() helper function for the classes -in the order they appear in class(.) (see S3 classes for more detail +in the order they appear in `class(.)` (see S3 classes for more detail [*Advanced R*](https://adv-r.hadley.nz/s3.html)). The following example will build support for `sfc`, which encompasses all `sf` objects: `sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`. If we want custom skim_with @@ -113,7 +112,7 @@ skim_sf <- skim_with( sfc = sfl( n_unique = n_unique, valid = ~ sum(sf::st_is_valid(.)), - crs= get_crs + crs = get_crs ) ) ``` @@ -128,6 +127,24 @@ like factors, characters, numerics, etc. Therefore our `skim_sf` is like the reg skim_sf(nc) ``` +** This seems unnecessary, but maybe I'm misunderstanding something. I think the skim_with function already creates + +Creating a function that is a method of the skim_by_type generic +for the data type allows skimming of an entire data frame that contains some +columns of that type. + +```{r} +skim_by_type.sfc <- function(mangled, columns, data) { + skimmed <- dplyr::summarize_at(data, columns, mangled$funs) + build_results(skimmed, columns, NULL) +} +``` + +```{r} +skim_sf(nc) +``` + + Sharing these functions within a separate package requires an export. The simplest way to do this is with Roxygen. @@ -143,6 +160,15 @@ skim_sf <- skim_with( crs = get_crs ) ) + +### This also seems unnecessary + +#' A skim_by_type function for `sfc_MULTIPOLYGON` objects. +#' @export +skim_by_type.sfc <- function(mangled, columns, data) { + skimmed <- dplyr::summarize_at(data, columns, mangled$funs) + skimr::build_results(skimmed, columns, NULL) +} ``` While this works within any package, there is an even better approach in this @@ -164,7 +190,7 @@ a generic we also want to identify the `skim_type` in the `sfl`. ```{r} #' @importFrom skimr get_skimmers #' @export -get_skimmers.sfc<- function(column) { +get_skimmers.sfc <- function(column) { sfl( skim_type = "sfc", n_unique = n_unique, From 1d24787a75ca7dd5c520c1285790a3f874fd0228 Mon Sep 17 00:00:00 2001 From: kylebutts Date: Mon, 9 Mar 2020 10:40:25 -0600 Subject: [PATCH 03/21] Update Supporting_additional_objects.Rmd --- vignettes/Supporting_additional_objects.Rmd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vignettes/Supporting_additional_objects.Rmd b/vignettes/Supporting_additional_objects.Rmd index 09e4cfc5..ac1b555f 100644 --- a/vignettes/Supporting_additional_objects.Rmd +++ b/vignettes/Supporting_additional_objects.Rmd @@ -231,5 +231,5 @@ This is a very simple example. For a package such as sf the custom statistics will likely be much more complex. The flexibility of `skimr` allows you to manage that. -Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma and Michael Sumner for +Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma, Michael Sumner, and Kyle Butts for inspiring and helping with the development of this code. From 774f70d3042673b0ed4a8d53567523547457a84e Mon Sep 17 00:00:00 2001 From: David Date: Sun, 10 May 2020 12:53:16 +0200 Subject: [PATCH 04/21] fix whitespace count and add test --- R/stats.R | 2 +- tests/testthat/test-stats.R | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/R/stats.R b/R/stats.R index 0e2438f3..ddc8f711 100644 --- a/R/stats.R +++ b/R/stats.R @@ -39,7 +39,7 @@ complete_rate <- function(x) { #' values using s+ regex. #' @export n_whitespace <- function(x) { - whitespace <- grepl("^\\s+", x) + whitespace <- grepl("^\\s+$", x) sum(whitespace) } diff --git a/tests/testthat/test-stats.R b/tests/testthat/test-stats.R index eaa13091..15f0b301 100644 --- a/tests/testthat/test-stats.R +++ b/tests/testthat/test-stats.R @@ -44,9 +44,9 @@ test_that("complete_rate is calculated correctly.", { }) test_that("n_whitespace is calculated correctly.", { - data <- c("a", "b", "c", NA, " ") + data <- c("a", "b", "c", NA, " ", " a", " ", " a") input <- n_whitespace(data) - expect_identical(input, 1L) + expect_identical(input, 2L) }) test_that("inline histogram is calculated correctly.", { From 72d131a69cdd2cd2d2dadf8094254bf50f8d2b3c Mon Sep 17 00:00:00 2001 From: dzimmermann Date: Wed, 13 May 2020 11:14:00 +0200 Subject: [PATCH 05/21] added David as ctb --- DESCRIPTION | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index 100715a0..bc19b214 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -83,7 +83,11 @@ Authors@R: person(given = "Daniel", family = "Possenriede", role = "ctb", - email = "possenriede@gmail.com")) + email = "possenriede@gmail.com"), + person(given = "David", + family = "Zimmermann", + role = "ctb", + email = "david_j_zimmermann@hotmail.com")) Description: A simple to use summary function that can be used with pipes and displays nicely in the console. The default summary statistics may be modified by the user as can the default formatting. Support for From d2eb842d489986709123c20a072690b4c7738a79 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:03:47 -0500 Subject: [PATCH 06/21] The next version of knitr::kable() will add the table caption if provided --- DESCRIPTION | 1 + 1 file changed, 1 insertion(+) diff --git a/DESCRIPTION b/DESCRIPTION index 100715a0..93287601 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -126,6 +126,7 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.0.2 +Remotes: yihui/knitr Collate: 'deprecated.R' 'dplyr.R' From c89b165b5d0948e1fa3ebfa675403962285c493f Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:06:28 -0500 Subject: [PATCH 07/21] Update knit_print.txt --- tests/testthat/print/knit_print.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/print/knit_print.txt b/tests/testthat/print/knit_print.txt index b9356562..41f38251 100644 --- a/tests/testthat/print/knit_print.txt +++ b/tests/testthat/print/knit_print.txt @@ -1,4 +1,6 @@ +Table: Data summary + | | | |:------------------------|:----| |Name |iris | From 5e3de24259971b5f830229a5bd32ad64d5738384 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:06:49 -0500 Subject: [PATCH 08/21] Update knit_print-summary.txt --- tests/testthat/print/knit_print-summary.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/testthat/print/knit_print-summary.txt b/tests/testthat/print/knit_print-summary.txt index ae880bf8..b86b9271 100644 --- a/tests/testthat/print/knit_print-summary.txt +++ b/tests/testthat/print/knit_print-summary.txt @@ -1,3 +1,5 @@ +Table: Data summary + | | | |:------------------------|:----| |Name |iris | From ee755d3b2d87addd2a9a7070e248c6eb75447617 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:12:23 -0500 Subject: [PATCH 09/21] Update appveyor.yml --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 0ceaaf33..1c6c5fbb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -19,6 +19,7 @@ build_script: - travis-tool.sh install_github r-lib/devtools - travis-tool.sh install_deps - travis-tool.sh install_github tidyverse/dplyr + - Rscript -e 'update.packages(ask = FALSE, checkBuilt = TRUE)' environment: global: From 63073445b9899a9687ed07b5bbfb9b24f12b9dba Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:24:01 -0500 Subject: [PATCH 10/21] install xfun explicitly --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 1c6c5fbb..5015cd9f 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,10 +16,10 @@ install: # Adapt as necessary starting from here build_script: + - Rscript -e 'install.packages("xfun", repos = "https://cloud.r-project.org")' - travis-tool.sh install_github r-lib/devtools - travis-tool.sh install_deps - travis-tool.sh install_github tidyverse/dplyr - - Rscript -e 'update.packages(ask = FALSE, checkBuilt = TRUE)' environment: global: From ddfe1ec48d05d2b9628884ad283efe3f22528326 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Mon, 22 Jun 2020 15:28:47 -0500 Subject: [PATCH 11/21] Update appveyor.yml --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 5015cd9f..4f8d6817 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,7 @@ install: # Adapt as necessary starting from here build_script: - - Rscript -e 'install.packages("xfun", repos = "https://cloud.r-project.org")' + - Rscript -e "install.packages('xfun', repos = 'https://cloud.r-project.org')" - travis-tool.sh install_github r-lib/devtools - travis-tool.sh install_deps - travis-tool.sh install_github tidyverse/dplyr From 1eeb104d7e5e57e45971e290439186a2cf799832 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Tue, 23 Jun 2020 09:53:37 -0500 Subject: [PATCH 12/21] knitr 1.29 is on CRAN now --- DESCRIPTION | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 93287601..a2e5a84c 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -102,7 +102,7 @@ Imports: cli, crayon, dplyr (>= 0.8.0), - knitr (>= 1.2), + knitr (>= 1.29), magrittr (>= 1.5), purrr, repr, @@ -126,7 +126,6 @@ Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) RoxygenNote: 7.0.2 -Remotes: yihui/knitr Collate: 'deprecated.R' 'dplyr.R' From 6d2190e0363d86c5547a49776b4e7c0a5b0035a7 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Tue, 23 Jun 2020 09:54:13 -0500 Subject: [PATCH 13/21] knitr 1.29 depends on the latest version of xfun, so no longer need to install it separately --- appveyor.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 4f8d6817..0ceaaf33 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -16,7 +16,6 @@ install: # Adapt as necessary starting from here build_script: - - Rscript -e "install.packages('xfun', repos = 'https://cloud.r-project.org')" - travis-tool.sh install_github r-lib/devtools - travis-tool.sh install_deps - travis-tool.sh install_github tidyverse/dplyr From da92226ea664b262426e38a23580c73fe1026f37 Mon Sep 17 00:00:00 2001 From: Yihui Xie Date: Wed, 24 Jun 2020 23:40:39 -0500 Subject: [PATCH 14/21] bump version --- DESCRIPTION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/DESCRIPTION b/DESCRIPTION index a2e5a84c..5bc451c6 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: skimr Title: Compact and Flexible Summaries of Data -Version: 2.1.1 +Version: 2.1.2 Authors@R: c(person(given = "Elin", family = "Waring", From 23c0ff5933ee23cf42f1c929788fc866df9a3af6 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 3 Jul 2020 12:48:00 -0400 Subject: [PATCH 15/21] Update tests to allow older versions of knitr and add news --- DESCRIPTION | 4 +-- NEWS.md | 6 ++++ codemeta.json | 6 ++-- tests/testthat/print/knit_print-old_knitr.txt | 31 +++++++++++++++++++ .../print/knit_print-summary-old_knitr.txt | 12 +++++++ tests/testthat/test-skim_print.R | 17 +++++++--- 6 files changed, 67 insertions(+), 9 deletions(-) create mode 100644 tests/testthat/print/knit_print-old_knitr.txt create mode 100644 tests/testthat/print/knit_print-summary-old_knitr.txt diff --git a/DESCRIPTION b/DESCRIPTION index bd170e57..83d34160 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -106,7 +106,7 @@ Imports: cli, crayon, dplyr (>= 0.8.0), - knitr (>= 1.29), + knitr (>= 1.2), magrittr (>= 1.5), purrr, repr, @@ -129,7 +129,7 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Collate: 'deprecated.R' 'dplyr.R' diff --git a/NEWS.md b/NEWS.md index 406d9292..e1629f37 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,3 +1,9 @@ +# skimr 2.1.2 + +### BUG FIXES + +* Update package to work with new version of `knitr`. + # skimr 2.1.1 (2020-04-15) ### MINOR IMPROVEMENTS diff --git a/codemeta.json b/codemeta.json index d3149b44..44aba69c 100644 --- a/codemeta.json +++ b/codemeta.json @@ -5,12 +5,12 @@ ], "@type": "SoftwareSourceCode", "identifier": "skimr", - "description": "A simple to use summary function that can be used\n with pipes and displays nicely in the console. The default summary\n statistics may be modified by the user as can the default formatting.\n Support for data frames and vectors is included, and users can\n implement their own skim methods for specific object types as\n described in a vignette. Default summaries include support for inline\n spark graphs. Instructions for managing these on specific operating\n systems are given in the \"Using skimr\" vignette and the README.", + "description": "A simple to use summary function that can be used with pipes\n and displays nicely in the console. The default summary statistics may\n be modified by the user as can the default formatting. Support for\n data frames and vectors is included, and users can implement their own\n skim methods for specific object types as described in a vignette.\n Default summaries include support for inline spark graphs.\n Instructions for managing these on specific operating systems are\n given in the \"Using skimr\" vignette and the README.", "name": "skimr: Compact and Flexible Summaries of Data", "codeRepository": "https://github.com/ropensci/skimr", "issueTracker": "https://github.com/ropensci/skimr/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "2.1", + "version": "2.1.1", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", @@ -415,7 +415,7 @@ ], "releaseNotes": "https://github.com/ropensci/skimr/blob/master/NEWS.md", "readme": "https://github.com/ropensci/skimr/blob/master/README.md", - "fileSize": "1028.792KB", + "fileSize": "1102.508KB", "contIntegration": [ "https://travis-ci.org/ropenscilabs/skimr", "https://codecov.io/gh/ropenscilabs/skimr" diff --git a/tests/testthat/print/knit_print-old_knitr.txt b/tests/testthat/print/knit_print-old_knitr.txt new file mode 100644 index 00000000..b9356562 --- /dev/null +++ b/tests/testthat/print/knit_print-old_knitr.txt @@ -0,0 +1,31 @@ + +| | | +|:------------------------|:----| +|Name |iris | +|Number of rows |150 | +|Number of columns |5 | +|_______________________ | | +|Column type frequency: | | +|factor |1 | +|numeric |4 | +|________________________ | | +|Group variables |None | + + +**Variable type: factor** + +|skim_variable | n_missing| complete_rate|ordered | n_unique|top_counts | +|:-------------|---------:|-------------:|:-------|--------:|:-------------------------| +|Species | 0| 1|FALSE | 3|set: 50, ver: 50, vir: 50 | + + +**Variable type: numeric** + +|skim_variable | n_missing| complete_rate| mean| sd| p0| p25| p50| p75| p100|hist | +|:-------------|---------:|-------------:|----:|----:|---:|---:|----:|---:|----:|:-----| +|Sepal.Length | 0| 1| 5.84| 0.83| 4.3| 5.1| 5.80| 6.4| 7.9|▆▇▇▅▂ | +|Sepal.Width | 0| 1| 3.06| 0.44| 2.0| 2.8| 3.00| 3.3| 4.4|▁▆▇▂▁ | +|Petal.Length | 0| 1| 3.76| 1.77| 1.0| 1.6| 4.35| 5.1| 6.9|▇▁▆▇▂ | +|Petal.Width | 0| 1| 1.20| 0.76| 0.1| 0.3| 1.30| 1.8| 2.5|▇▁▇▅▃ | + + diff --git a/tests/testthat/print/knit_print-summary-old_knitr.txt b/tests/testthat/print/knit_print-summary-old_knitr.txt new file mode 100644 index 00000000..f7e7159d --- /dev/null +++ b/tests/testthat/print/knit_print-summary-old_knitr.txt @@ -0,0 +1,12 @@ + +| | | +|:------------------------|:----| +|Name |iris | +|Number of rows |150 | +|Number of columns |5 | +|_______________________ | | +|Column type frequency: | | +|factor |1 | +|numeric |4 | +|________________________ | | +|Group variables |None | diff --git a/tests/testthat/test-skim_print.R b/tests/testthat/test-skim_print.R index 3fb327b0..c0dd7eac 100644 --- a/tests/testthat/test-skim_print.R +++ b/tests/testthat/test-skim_print.R @@ -30,14 +30,23 @@ test_that("knit_print produces expected results", { input <- knit_print(skimmed) expect_is(input, "knit_asis") expect_length(input, 1) - expect_matches_file(input, "print/knit_print.txt") + if (packageVersion("knitr") <= "1.28") { + expect_matches_file(input, "print/knit_print-knitr_old.txt") + } else { + expect_matches_file(input, "print/knit_print.txt") + } }) test_that("knit_print works with skim summaries", { - skimmed <- skim(iris) - summarized <- summary(skimmed) - input <- knitr::knit_print(summarized) + + skimmed <- skim(iris) + summarized <- summary(skimmed) + input <- knitr::knit_print(summarized) + if (packageVersion("knitr") <= "1.28") { + expect_matches_file(input, "print/knit_print-summary-knitr_old.txt") + } else { expect_matches_file(input, "print/knit_print-summary.txt") + } }) test_that("knit_print appropriately falls back to tibble printing", { From aceec0122224cbe5cb919da5d481038ed99184cc Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 3 Jul 2020 13:44:14 -0400 Subject: [PATCH 16/21] Add support for lubridate Timspan class because it is S4 and does not fall back in the way skimr expects. --- DESCRIPTION | 3 ++- NAMESPACE | 1 + R/get_skimmers.R | 7 +++++ man/get_skimmers.Rd | 6 +++++ tests/testthat/test-get_skimmers.R | 3 ++- tests/testthat/test-skim.R | 41 ++++++++++++++++++++++++++++++ 6 files changed, 59 insertions(+), 2 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index bd170e57..f3fbf632 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -121,6 +121,7 @@ Imports: Suggests: covr, extrafont, + lubridate, rmarkdown, sf, testthat (>= 2.0.0) @@ -129,7 +130,7 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.0.2 +RoxygenNote: 7.1.0 Collate: 'deprecated.R' 'dplyr.R' diff --git a/NAMESPACE b/NAMESPACE index ff68fe7b..f87e9143 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -3,6 +3,7 @@ S3method(get_skimmers,AsIs) S3method(get_skimmers,Date) S3method(get_skimmers,POSIXct) +S3method(get_skimmers,Timespan) S3method(get_skimmers,character) S3method(get_skimmers,complex) S3method(get_skimmers,default) diff --git a/R/get_skimmers.R b/R/get_skimmers.R index 22775711..dc0ec59a 100644 --- a/R/get_skimmers.R +++ b/R/get_skimmers.R @@ -167,6 +167,13 @@ get_skimmers.difftime <- function(column) { modify_default_skimmers("Date", new_skim_type = "difftime") } +#' @describeIn get_skimmers Summary functions for `Timespan` columns: [min()], +#' [max()], [median()] and [n_unique()]. +#' @export +get_skimmers.Timespan <- function(column) { + modify_default_skimmers("difftime", new_skim_type = "Timespan") +} + #' @describeIn get_skimmers Summary functions for `ts` columns: [min()], #' [max()], [median()] and [n_unique()]. #' @export diff --git a/man/get_skimmers.Rd b/man/get_skimmers.Rd index af77f241..4d745029 100644 --- a/man/get_skimmers.Rd +++ b/man/get_skimmers.Rd @@ -11,6 +11,7 @@ \alias{get_skimmers.Date} \alias{get_skimmers.POSIXct} \alias{get_skimmers.difftime} +\alias{get_skimmers.Timespan} \alias{get_skimmers.ts} \alias{get_skimmers.list} \alias{get_skimmers.AsIs} @@ -37,6 +38,8 @@ get_skimmers(column) \method{get_skimmers}{difftime}(column) +\method{get_skimmers}{Timespan}(column) + \method{get_skimmers}{ts}(column) \method{get_skimmers}{list}(column) @@ -107,6 +110,9 @@ default for unknown columns: \code{\link[=min_char]{min_char()}}, \code{\link[=m \item \code{difftime}: Summary functions for \code{difftime} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. +\item \code{Timespan}: Summary functions for \code{Timespan} columns: \code{\link[=min]{min()}}, +\code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. + \item \code{ts}: Summary functions for \code{ts} columns: \code{\link[=min]{min()}}, \code{\link[=max]{max()}}, \code{\link[=median]{median()}} and \code{\link[=n_unique]{n_unique()}}. diff --git a/tests/testthat/test-get_skimmers.R b/tests/testthat/test-get_skimmers.R index 1789b77e..7af06830 100644 --- a/tests/testthat/test-get_skimmers.R +++ b/tests/testthat/test-get_skimmers.R @@ -17,7 +17,7 @@ test_that("get_default_skimmer_names() has a correct list of defaults", { names(defaults), c( "AsIs", "character", "complex", "Date", "difftime", "factor", "list", - "logical", "numeric", "POSIXct", "ts" + "logical", "numeric", "POSIXct", "Timespan", "ts" ) ) expect_identical(defaults$AsIs, c("n_unique", "min_length", "max_length")) @@ -27,6 +27,7 @@ test_that("get_default_skimmer_names() has a correct list of defaults", { expect_identical(defaults$complex, c("mean")) expect_identical(defaults$Date, c("min", "max", "median", "n_unique")) expect_identical(defaults$difftime, c("min", "max", "median", "n_unique")) + expect_identical(defaults$Timespan, c("min", "max", "median", "n_unique")) expect_identical(defaults$factor, c("ordered", "n_unique", "top_counts")) expect_identical(defaults$list, c("n_unique", "min_length", "max_length")) expect_identical(defaults$logical, c("mean", "count")) diff --git a/tests/testthat/test-skim.R b/tests/testthat/test-skim.R index 842e5d86..d477a28e 100644 --- a/tests/testthat/test-skim.R +++ b/tests/testthat/test-skim.R @@ -629,6 +629,47 @@ test_that("skim returns expected response for difftime vectors", { expect_identical(input$difftime.n_unique, 9L) }) +test_that("skim returns expected response for lubridate Timespan vectors", { + dt <- tibble::tibble(x = lubridate::duration(1)) + input <- skim(dt) + + # dimensions + expect_n_rows(input, 1) + expect_n_columns(input, 8) + + # classes + expect_is(input, "skim_df") + expect_is(input, "tbl_df") + expect_is(input, "tbl") + expect_is(input, "data.frame") + expect_named(input, c( + "skim_type", "skim_variable", "n_missing", "complete_rate", + "Timespan.min", "Timespan.max", "Timespan.median", + "Timespan.n_unique" + )) + + # attributes + attrs <- attributes(input) + expect_equal(attrs$data_rows, 1) + expect_equal(attrs$data_cols, 1) + expect_equal(attrs$df_name, "`dt`") + expect_equal( + attrs$skimmers_used, + list(Timespan = c("min", "max", "median", "n_unique")) + ) + + # values + expect_identical(input$skim_variable, "x") + expect_identical(input$skim_type, "Timespan") + expect_identical(input$n_missing, 0L) + expect_equal(input$complete_rate, 1, tolerance = .001) + expect_identical(input$Timespan.min, 1) + expect_identical(input$Timespan.max, 1) + expect_identical(input$Timespan.median, lubridate::duration(1)) + expect_identical(input$Timespan.n_unique, 1L) +}) + + test_that("skim handles objects with multiple classes", { dat <- seq(as.Date("2011-07-01"), by = 1, len = 10) dat[2] <- NA From 016fa985a11f014e457119a274da6fad0998f9e3 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 3 Jul 2020 23:02:29 -0400 Subject: [PATCH 17/21] update codemeta.json --- codemeta.json | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/codemeta.json b/codemeta.json index d3149b44..4a733515 100644 --- a/codemeta.json +++ b/codemeta.json @@ -5,19 +5,18 @@ ], "@type": "SoftwareSourceCode", "identifier": "skimr", - "description": "A simple to use summary function that can be used\n with pipes and displays nicely in the console. The default summary\n statistics may be modified by the user as can the default formatting.\n Support for data frames and vectors is included, and users can\n implement their own skim methods for specific object types as\n described in a vignette. Default summaries include support for inline\n spark graphs. Instructions for managing these on specific operating\n systems are given in the \"Using skimr\" vignette and the README.", + "description": "A simple to use summary function that can be used with pipes\n and displays nicely in the console. The default summary statistics may\n be modified by the user as can the default formatting. Support for\n data frames and vectors is included, and users can implement their own\n skim methods for specific object types as described in a vignette.\n Default summaries include support for inline spark graphs.\n Instructions for managing these on specific operating systems are\n given in the \"Using skimr\" vignette and the README.", "name": "skimr: Compact and Flexible Summaries of Data", "codeRepository": "https://github.com/ropensci/skimr", "issueTracker": "https://github.com/ropensci/skimr/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "2.1", + "version": "2.1.2", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R", - "version": "3.6.1", "url": "https://r-project.org" }, - "runtimePlatform": "R version 3.6.1 (2019-07-05)", + "runtimePlatform": "R version 4.0.1 (2020-06-06)", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -145,6 +144,12 @@ "givenName": "Daniel", "familyName": "Possenriede", "email": "possenriede@gmail.com" + }, + { + "@type": "Person", + "givenName": "David", + "familyName": "Zimmermann", + "email": "david_j_zimmermann@hotmail.com" } ], "copyrightHolder": [ @@ -186,6 +191,18 @@ }, "sameAs": "https://CRAN.R-project.org/package=extrafont" }, + { + "@type": "SoftwareApplication", + "identifier": "lubridate", + "name": "lubridate", + "provider": { + "@id": "https://cran.r-project.org", + "@type": "Organization", + "name": "Comprehensive R Archive Network (CRAN)", + "url": "https://cran.r-project.org" + }, + "sameAs": "https://CRAN.R-project.org/package=lubridate" + }, { "@type": "SoftwareApplication", "identifier": "rmarkdown", @@ -272,7 +289,7 @@ "@type": "SoftwareApplication", "identifier": "knitr", "name": "knitr", - "version": ">= 1.2", + "version": ">= 1.29", "provider": { "@id": "https://cran.r-project.org", "@type": "Organization", @@ -415,15 +432,12 @@ ], "releaseNotes": "https://github.com/ropensci/skimr/blob/master/NEWS.md", "readme": "https://github.com/ropensci/skimr/blob/master/README.md", - "fileSize": "1028.792KB", - "contIntegration": [ - "https://travis-ci.org/ropenscilabs/skimr", - "https://codecov.io/gh/ropenscilabs/skimr" - ], + "fileSize": "364528.594KB", + "contIntegration": ["https://travis-ci.org/ropensci/skimr", "https://ci.appveyor.com/project/michaelquinn32/skimr", "https://codecov.io/gh/ropensci/skimr"], "review": { "@type": "Review", - "url": "https://github.com/ropensci/onboarding/issues/175", - "provider": "http://ropensci.org" + "url": "https://github.com/ropensci/software-review/issues/175", + "provider": "https://ropensci.org" }, "keywords": [ "unconf17", From 528e1097353bebf3bdbfed387cd48dc2ae817665 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Fri, 3 Jul 2020 23:11:28 -0400 Subject: [PATCH 18/21] Update news --- NEWS.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/NEWS.md b/NEWS.md index e1629f37..a9a9f687 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,9 @@ # skimr 2.1.2 +### MINOR IMPROVEMENTS + +* Add support for lubridate Timespan objects. + ### BUG FIXES * Update package to work with new version of `knitr`. From 35eea298d25b2dbae8bda8856a2056412388f83e Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sat, 4 Jul 2020 19:32:17 -0400 Subject: [PATCH 19/21] Update cross links. --- DESCRIPTION | 2 +- codemeta.json | 2 +- man/mutate.skim_df.Rd | 4 ++-- man/print.Rd | 6 +++--- man/reexports.Rd | 4 ++-- man/sfl.Rd | 2 +- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index cf45d188..2b6e2cf2 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -130,7 +130,7 @@ VignetteBuilder: Encoding: UTF-8 LazyData: true Roxygen: list(markdown = TRUE) -RoxygenNote: 7.1.0 +RoxygenNote: 7.1.1 Collate: 'deprecated.R' 'dplyr.R' diff --git a/codemeta.json b/codemeta.json index 134e9416..3302f22c 100644 --- a/codemeta.json +++ b/codemeta.json @@ -432,7 +432,7 @@ ], "releaseNotes": "https://github.com/ropensci/skimr/blob/master/NEWS.md", "readme": "https://github.com/ropensci/skimr/blob/master/README.md", - "fileSize": "364531.118KB", + "fileSize": "364473.989KB", "contIntegration": ["https://travis-ci.org/ropensci/skimr", "https://ci.appveyor.com/project/michaelquinn32/skimr", "https://codecov.io/gh/ropensci/skimr"], "review": { "@type": "Review", diff --git a/man/mutate.skim_df.Rd b/man/mutate.skim_df.Rd index d148e58d..f0bc62c3 100644 --- a/man/mutate.skim_df.Rd +++ b/man/mutate.skim_df.Rd @@ -16,9 +16,9 @@ be the name of a new variable, and the value will be its corresponding value. Use \code{NULL} value in \code{mutate} to drop a variable. New variables overwrite existing variables of the same name. -The arguments in \code{...} are automatically quoted with \code{\link[rlang:quo]{rlang::quo()}} and +The arguments in \code{...} are automatically quoted with \code{\link[rlang:nse-defuse]{rlang::quo()}} and evaluated with \code{\link[rlang:eval_tidy]{rlang::eval_tidy()}} in the context of the data frame. They -support unquoting \code{\link[rlang:quasiquotation]{rlang::quasiquotation}} and splicing. See +support unquoting \code{\link[rlang:nse-force]{rlang::quasiquotation}} and splicing. See \code{vignette("programming", package = "dplyr")} for an introduction to these concepts.} } diff --git a/man/print.Rd b/man/print.Rd index f06cc434..cdf30e30 100644 --- a/man/print.Rd +++ b/man/print.Rd @@ -106,8 +106,8 @@ package. You can control printing behavior using the same global options. Printing a \code{skim_df} requires specific columns that might be dropped when -using \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:summarize]{dplyr::summarize()}} on a \code{skim_df}. In those -cases, this method falls back to \code{\link[tibble:print.tbl]{tibble::print.tbl()}}. +using \code{\link[dplyr:select]{dplyr::select()}} or \code{\link[dplyr:summarise]{dplyr::summarize()}} on a \code{skim_df}. In those +cases, this method falls back to \code{\link[tibble:formatting]{tibble::print.tbl()}}. } \section{Controlling metadata behavior}{ @@ -122,7 +122,7 @@ empty you may need to run the following \code{options(crayon.enabled = FALSE)}. } \seealso{ -\code{\link[tibble:trunc_mat]{tibble::trunc_mat()}} For a list of global options for customizing +\code{\link[tibble:formatting]{tibble::trunc_mat()}} For a list of global options for customizing print formatting. \code{\link[crayon:has_color]{crayon::has_color()}} for the variety of issues that affect tibble's color support. } diff --git a/man/reexports.Rd b/man/reexports.Rd index a41446f2..53fd354b 100644 --- a/man/reexports.Rd +++ b/man/reexports.Rd @@ -18,8 +18,8 @@ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ - \item{magrittr}{\code{\link[magrittr]{\%>\%}}} + \item{magrittr}{\code{\link[magrittr:pipe]{\%>\%}}} - \item{tidyselect}{\code{\link[tidyselect]{contains}}, \code{\link[tidyselect]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect]{matches}}, \code{\link[tidyselect]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} + \item{tidyselect}{\code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}} }} diff --git a/man/sfl.Rd b/man/sfl.Rd index 4383b8e7..a84b82b3 100644 --- a/man/sfl.Rd +++ b/man/sfl.Rd @@ -34,7 +34,7 @@ returned by \code{dplyr::funs()} and a list of skimming functions to drop. \description{ This constructor is used to create a named list of functions. It also you also pass \code{NULL} to identify a skimming function that you wish to remove. -Only functions that return a single value, working with \code{\link[dplyr:summarize]{dplyr::summarize()}}, +Only functions that return a single value, working with \code{\link[dplyr:summarise]{dplyr::summarize()}}, can be used within \code{sfl}. } \details{ From 6491d35a2068d791ae842a3eb78e74e4adb280e0 Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sun, 5 Jul 2020 09:21:37 -0400 Subject: [PATCH 20/21] Update additional objects vignette --- vignettes/Supporting_additional_objects.Rmd | 107 ++++++-------------- 1 file changed, 32 insertions(+), 75 deletions(-) diff --git a/vignettes/Supporting_additional_objects.Rmd b/vignettes/Supporting_additional_objects.Rmd index ac1b555f..b9abab0c 100644 --- a/vignettes/Supporting_additional_objects.Rmd +++ b/vignettes/Supporting_additional_objects.Rmd @@ -24,11 +24,9 @@ involves two required elements and one optional element. - if needed, define any custom statistics If you are adding skim support to a package you will also need to add `skimr` -to the list of imports. Note that in this vignette the actual analysis will -not be run because that would require importing the `sf` package just for this -example. However to run it on your own you can install `sf` and then run the -following code. Note that code in this vignette was not evaluated when -rendering the vignette in order to avoid forcing installation of sf. +to the list of imports. Note that to run the code in this vignette you will +need to install the `sf` package. We suggest not doing that, and instead +substitute whatever package you are working with. ```{r} library(skimr) @@ -48,7 +46,7 @@ frame in the "Using skimr" vignette, this is a different type of object with special attributes. In this object there is also a column of a class that does not have default -skimmers. By default, skimr falls back to use the sfl for character variables. +skimmers. By default, skimr falls back to use the sfl for character variables. ```{r} skim(nc$geometry) @@ -98,13 +96,13 @@ times. Next, we create a custom skimming function. To do this, we need to think about the many specific classes of data in the `sf` package. From above, you can see the geometry column has two classes: 1st the specific geometry type (e.g. -`sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`) and 2nd the -general sfc class. Skimr will try to find a sfl() helper function for the classes -in the order they appear in `class(.)` (see S3 classes for more detail -[*Advanced R*](https://adv-r.hadley.nz/s3.html)). The following example will build -support for `sfc`, which encompasses all `sf` objects: `sfc_MULTIPOLYGON` +`sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`) and 2nd +the general sfc class. Skimr will try to find a sfl() helper function for the +classes in the order they appear in `class(.)` (see S3 classes for more detail +[*Advanced R*](https://adv-r.hadley.nz/s3.html)). The following example will +build support for `sfc`, which encompasses all `sf` objects: `sfc_MULTIPOLYGON` `sfc_LINESTRING`, `sfc_POLYGON`, `sfc_MULTIPOINT`. If we want custom skim_with -functions we can write sfl() helper functions for the geometry type. +functions we can write `sfl()` helper functions for the geometry type. ```{r} @@ -120,59 +118,17 @@ skim_sf <- skim_with( The example above creates a new *function*, and you can call that function on a specific column with `sfc` data to get the appropriate summary statistics. The `skim_with` factory also uses the default skimrs for things -like factors, characters, numerics, etc. Therefore our `skim_sf` is like the regular +like factors, characters, and numerics. Therefore our `skim_sf` is like the regular `skim` function with the added ability to summarize `sfc` columns. ```{r} -skim_sf(nc) +skim_sf(nc$geometry) ``` -** This seems unnecessary, but maybe I'm misunderstanding something. I think the skim_with function already creates -Creating a function that is a method of the skim_by_type generic -for the data type allows skimming of an entire data frame that contains some -columns of that type. - -```{r} -skim_by_type.sfc <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - build_results(skimmed, columns, NULL) -} -``` - -```{r} -skim_sf(nc) -``` - - -Sharing these functions within a separate package requires an export. -The simplest way to do this is with Roxygen. - -```{r} -#' Skimming functions for `sfc` objects. -#' @export -skim_sf <- skim_with( - sfc = sfl( - missing = n_missing, - n = length, - n_unique = n_unique, - valid = ~ sum(sf::st_is_valid(.)), - crs = get_crs - ) -) - -### This also seems unnecessary - -#' A skim_by_type function for `sfc_MULTIPOLYGON` objects. -#' @export -skim_by_type.sfc <- function(mangled, columns, data) { - skimmed <- dplyr::summarize_at(data, columns, mangled$funs) - skimr::build_results(skimmed, columns, NULL) -} -``` - -While this works within any package, there is an even better approach in this -case. To take full advantage of `skimr`, we'll dig a bit into its API. +While this works for any data type and you can also include it within any +package (assuming your users load skimr), there is an even better approach in +this case. To take full advantage of `skimr`, we'll dig a bit into its API. ## Adding new methods @@ -181,11 +137,15 @@ find default summary functions for each class. This is based on the S3 class system. You can learn more about it in [*Advanced R*](https://adv-r.hadley.nz/s3.html). +This requires that you add `skimr` to your list of dependencies. + To export a new set of defaults for a data type, create a method for the generic function `get_skimmers`. Each of those methods returns an `sfl`, a `skimr` function list. This is the same list-like data structure used in the `skim_with()` example above. But note! There is one key difference. When adding -a generic we also want to identify the `skim_type` in the `sfl`. +a generic we also want to identify the `skim_type` in the `sfl`. You will +probably want to use `skimr::get_skimmers.sfc()` but that will not work in a +vignette. ```{r} #' @importFrom skimr get_skimmers @@ -206,30 +166,27 @@ The same strategy follows for other data types. * return an `sfl` * make sure that the `skim_type` is there +Users of your package should load `skimr` to get the `skim()` function +(although you could import and reexport it). Once +loaded, a call to `get_default_skimmer_names()` will return defaults for your +data types as well! + ```{r} -#' @export -get_skimmers.sfc_POINT <- function(column) { - sfl( - skim_type = "sfc_POINT", - n_unique = n_unique, - valid = ~ sum(sf::st_is_valid(.)) - ) -} +get_default_skimmer_names() ``` -Users of your package should load `skimr` to get the `skim()` function. Once -loaded, a call to `get_default_skimmer_names()` will return defaults for your -data types as well! +They will then be able to use `skim()` directly. ```{r} -get_default_skimmer_names() +skim(nc) ``` + ## Conclusion -This is a very simple example. For a package such as sf the custom statistics +This is a very simple example. For a package such as `sf` the custom statistics will likely be much more complex. The flexibility of `skimr` allows you to manage that. -Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma, Michael Sumner, and Kyle Butts for -inspiring and helping with the development of this code. +Thanks to Jakub Nowosad, Tiernan Martin, Edzer Pebesma, Michael Sumner, and +Kyle Butts for inspiring and helping with the development of this code. From cd00bccec4941019a10edc9f62e7c4b796e6881a Mon Sep 17 00:00:00 2001 From: Elin Waring Date: Sun, 5 Jul 2020 10:59:54 -0400 Subject: [PATCH 21/21] Update news and description --- DESCRIPTION | 8 ++++++-- NEWS.md | 1 + codemeta.json | 9 +++++++-- 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 2b6e2cf2..700585ad 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -87,8 +87,12 @@ Authors@R: person(given = "David", family = "Zimmermann", role = "ctb", - email = "david_j_zimmermann@hotmail.com")) -Description: A simple to use summary function that can be used with pipes + email = "david_j_zimmermann@hotmail.com"), + person(given = "Kyle", + family = "Butts", + role ="ctb", + email = "")) +Description: A simple to use summary function that can be used buttskyle96@gmail.comwith pipes and displays nicely in the console. The default summary statistics may be modified by the user as can the default formatting. Support for data frames and vectors is included, and users can implement their own diff --git a/NEWS.md b/NEWS.md index a9a9f687..ce57d229 100644 --- a/NEWS.md +++ b/NEWS.md @@ -3,6 +3,7 @@ ### MINOR IMPROVEMENTS * Add support for lubridate Timespan objects. +* Improvements to Supporting Additional Objects vignette. ### BUG FIXES diff --git a/codemeta.json b/codemeta.json index 3302f22c..42698601 100644 --- a/codemeta.json +++ b/codemeta.json @@ -5,7 +5,7 @@ ], "@type": "SoftwareSourceCode", "identifier": "skimr", - "description": "A simple to use summary function that can be used with pipes\n and displays nicely in the console. The default summary statistics may\n be modified by the user as can the default formatting. Support for\n data frames and vectors is included, and users can implement their own\n skim methods for specific object types as described in a vignette.\n Default summaries include support for inline spark graphs.\n Instructions for managing these on specific operating systems are\n given in the \"Using skimr\" vignette and the README.", + "description": "A simple to use summary function that can be used buttskyle96@gmail.comwith pipes\n and displays nicely in the console. The default summary statistics may\n be modified by the user as can the default formatting. Support for\n data frames and vectors is included, and users can implement their own\n skim methods for specific object types as described in a vignette.\n Default summaries include support for inline spark graphs.\n Instructions for managing these on specific operating systems are\n given in the \"Using skimr\" vignette and the README.", "name": "skimr: Compact and Flexible Summaries of Data", "codeRepository": "https://github.com/ropensci/skimr", "issueTracker": "https://github.com/ropensci/skimr/issues", @@ -150,6 +150,11 @@ "givenName": "David", "familyName": "Zimmermann", "email": "david_j_zimmermann@hotmail.com" + }, + { + "@type": "Person", + "givenName": "Kyle", + "familyName": "Butts" } ], "copyrightHolder": [ @@ -432,7 +437,7 @@ ], "releaseNotes": "https://github.com/ropensci/skimr/blob/master/NEWS.md", "readme": "https://github.com/ropensci/skimr/blob/master/README.md", - "fileSize": "364473.989KB", + "fileSize": "364473.922KB", "contIntegration": ["https://travis-ci.org/ropensci/skimr", "https://ci.appveyor.com/project/michaelquinn32/skimr", "https://codecov.io/gh/ropensci/skimr"], "review": { "@type": "Review",