Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
change name from ggmissing to naniar
  • Loading branch information
njtierney committed Dec 16, 2016
1 parent 06259fc commit 6607a02
Show file tree
Hide file tree
Showing 19 changed files with 155 additions and 47 deletions.
6 changes: 3 additions & 3 deletions DESCRIPTION
@@ -1,10 +1,10 @@
Package: ggmissing
Package: naniar
Type: Package
Title: enables ggplot to plot missing data
Title: Enables ggplot to plot missing data
Version: 0.2.9000
Author: Nicholas Tierney, Miles McBain, Di Cook,
Maintainer: <nicholas.tierney@gmail.com>
Description: ggmissing helps display missing data in ggplot
Description: naniar helps display missing data in ggplot
License: MIT + file LICENSE
LazyData: TRUE
Suggests:
Expand Down
6 changes: 3 additions & 3 deletions R/geom_missing_histogram.R
Expand Up @@ -2,7 +2,7 @@
#' @name geom_missing_histogram
#' @description geom_missing_histogram adds a point geometry for displaying missingness.
#' @note Very first attempt at creating a geom that is compatible with ggplot2.
#'
#'
#' Data plotting works. Still todo:
#' manipulate the colour aes so that the colours and legend appear.
#' fix awful default point sizes.
Expand Down Expand Up @@ -39,10 +39,10 @@ geom_missing_histogram <- function(mapping = NULL,
...
)
)

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
GeomMissingHistogram <- ggproto("GeomMissingHistogram", GeomRect,
required_aes = "x",
Expand Down
4 changes: 2 additions & 2 deletions R/geom_missing_point.R
Expand Up @@ -11,7 +11,7 @@
#'
# this code messes up the documentation
# library(ggplot2)
# library(ggmissing)
# library(naniar)
#
# ggplot(data = brfss,
# aes(x = PHYSHLTH,
Expand Down Expand Up @@ -50,7 +50,7 @@ geom_missing_point <- function(mapping = NULL,

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
GeomMissingPoint <- ggproto("GeomMissingPoint", GeomPoint,
required_aes = c("x", "y"),
Expand Down
6 changes: 0 additions & 6 deletions R/ggmissing-ggproto.R

This file was deleted.

6 changes: 3 additions & 3 deletions R/missing_data_tidiers.R
Expand Up @@ -13,7 +13,7 @@
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#' percent_missing_df(airquality)
#'
percent_missing_df <- function(dat){
Expand All @@ -36,7 +36,7 @@ percent_missing_df <- function(dat){
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#'
#' percent_missing_var(airquality)
#'
Expand Down Expand Up @@ -65,7 +65,7 @@ percent_missing_var <- function(dat){
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#' percent_missing_case(airquality)
#'
percent_missing_case <- function(dat){
Expand Down
6 changes: 6 additions & 0 deletions R/naniar-ggproto.R
@@ -0,0 +1,6 @@
#' @name naniar-ggproto
#' @title naniar-ggroto
#'
#' @description These are the stat and geom overrides using ggproto from ggplot2 that make naniar work.
#'
NULL
File renamed without changes.
18 changes: 18 additions & 0 deletions R/revealers.R
@@ -0,0 +1,18 @@
# derive_shadows:
# helpers to clear up common representations of missing values
# such as "NA", "N/A", etc.
# and might allow for an easier way for users to describe different
# missing data codes, such as -99, which might indicate missing, but
# some other kind of missing value, perhaps a different mechanism of
# missingness
# ideas for function names:
# narify (play on clarify)
# darken ()
# shade
# refract

# Other commands that might be useful?
# `is_na`
# `fill_na`
# `drop_na`
# `is_null`
87 changes: 87 additions & 0 deletions R/shadows.R
@@ -0,0 +1,87 @@

# give NAs a more meaningful label
is_na <- function(x) {
factor(is.na(x), levels = c(FALSE, TRUE), labels = c("!NA", "NA"))
}

# append some shadow cols

# return a tibble that is a shadow matrix form.

as_shadow <- function(data){

data_shadow <- purrr::map_df(data, is_na)

names(data_shadow) <- paste0(names(data),"_NA")

data_shadow

}

# as_shadow(airquality)

bind_shadow <- function(data){

# data_shadow <- map_df(data, is_na)
# names(data_shadow) <- paste0(names(data),"_NA")
data_shadow <- as_shadow(data)

bound_shadow <- tibble::as_tibble(dplyr::bind_cols(data, data_shadow))

bound_shadow

}

ggplot(data = bind_shadow(airquality),
aes(x = Ozone)) +
geom_histogram() +
facet_wrap(~Solar.R_NA,
ncol = 1)
#
ggplot(data = airquality,
aes(x = Ozone)) +
geom_histogram() +
facet_wrap(~is_na(Solar.R),
ncol = 1)
#
# ggplot(bind_shadow(airquality),
# aes(x = Ozone,
# colour = Solar.R_NA)) +
# geom_density() +
# geom_rug()
#
# ggplot(airquality,
# aes(x = Ozone,
# colour = is_na(Solar.R))) +
# geom_density() +
# geom_rug()

# other "long" shadow format

# shadow_join exists as we want to include this extra metadata about the rows that have missing data, but I also wanted to include some extra information about the class of the data, in case we need to gather the data back into a wider, rather than long, format. Here it takes a function `visdat`, `visdat:::fingerprint`, which is currently not a particularly complex function.

gather_shadow <- function(df){

df_val_type <- df %>%
as_data_frame() %>%
purrr::dmap(visdat:::fingerprint) %>%
mutate(rows = row_number()) %>%
tidyr::gather_(key_col = "variable",
value_col = "valueType",
gather_cols = names(.)[-length(.)])

# df_shadow
df_shadow <- df %>%
as_data_frame() %>%
mutate(rows = row_number()) %>%
gather(key = variable,
value = value,
-rows) %>%
mutate(shadow_matrix = is_na(value)) %>%
left_join(df_val_type)

# perhaps define some attributes

}

aq_shadow <- gather_shadow(airquality)
4 changes: 2 additions & 2 deletions R/stat_missing_histogram.R
Expand Up @@ -29,11 +29,11 @@ stat_missing_histogram <- function(mapping = NULL,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)

}


#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
StatMissingHistogram <- ggproto("StatMissingHistogram", Stat,
required_aes = c("x"),
Expand Down
2 changes: 1 addition & 1 deletion R/stat_missing_point.R
Expand Up @@ -31,7 +31,7 @@ stat_missing_point <- function(mapping = NULL,

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
StatMissingPoint <- ggproto("StatMissingPoint", Stat,
required_aes = c("x", "y"),
Expand Down
22 changes: 11 additions & 11 deletions README.Rmd
Expand Up @@ -12,32 +12,32 @@ knitr::opts_chunk$set(
)
```

# ggmissing
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/njtierney/ggmissing?branch=master&svg=true)](https://ci.appveyor.com/project/njtierney/ggmissing) [![Travis-CI Build Status](https://travis-ci.org/njtierney/ggmissing.svg?branch=master)](https://travis-ci.org/njtierney/ggmissing)
# naniar
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/njtierney/naniar?branch=master&svg=true)](https://ci.appveyor.com/project/njtierney/naniar) [![Travis-CI Build Status](https://travis-ci.org/njtierney/naniar.svg?branch=master)](https://travis-ci.org/njtierney/naniar)

`ggmissing` adds ggplot `geom`s to display missingness.
`naniar` adds ggplot `geom`s to display missingness.

## Why?

Missing data is ubiquitous in data analysis. However, vis points are omitted in ggplot, and although it does provides a warning message telling you that you have missing data, it is tricky to visualise missing data.

`ggmissing` is part of a larger plan for a set of tidy-verse packages focussing on how to tidy, transform, visualise, model, and communicate missing data.
`naniar` is part of a larger plan for a set of tidy-verse packages focussing on how to tidy, transform, visualise, model, and communicate missing data.

It is still very much under development, and may have unknown bugs, due to the fact that ggplot was not initially built to handle missing data in this way. We will see more active development over the next 6 months.

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

## What does it do?

`ggmissing` provides:
`naniar` provides:

1. Missing data geoms for ggplot (`geom_missing_point`)

2. Tidyverse summary functions for missing data (`summarise_missingness` and friends)

3. ggplots for exploring missing data (`gg_missing_var`, `gg_missing_case`, `gg_missing_which`)

## Using ggmissing {#ggplot}
## Using naniar {#ggplot}

### How does it work?

Expand All @@ -46,8 +46,8 @@ Plotting missing data might sound a little strange - how do you visualise someth
To illustrate, let's explore the relationship between Ozone and Solar radiation from the airquality dataset.

```{r messages = FALSE, error = FALSE}
library(ggmissing)
# devtools::install_github("njtierney/ggmissing")
library(naniar)
# devtools::install_github("njtierney/naniar")
library(ggplot2)
library(dplyr)
Expand All @@ -68,7 +68,7 @@ We can instead use the `geom_missing_point()` to display the missing data

```{r}
library(ggmissing)
library(naniar)
library(ggplot2)
ggplot(data = airquality,
Expand Down Expand Up @@ -105,7 +105,7 @@ p1 + theme_bw()

# Missing data tidying functions {#tidying}

`ggmissing` uses some missingness transformation functions to set up tables for visualisation.
`naniar` uses some missingness transformation functions to set up tables for visualisation.

```{r}
Expand Down Expand Up @@ -207,7 +207,7 @@ vis_miss(airquality)

# Future Work

`ggmissing` has not seen much attention for the past 6 months or so, and so will be undergoing more changes over the next 6 months, with plans to have the package in CRAN before the end of 2016.
`naniar` has not seen much attention for the past 6 months or so, and so will be undergoing more changes over the next 6 months, with plans to have the package in CRAN before the end of 2016.

As such, we plan to extend the `geom_missing` family to include:

Expand Down
2 changes: 1 addition & 1 deletion man/ggmissing.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

8 changes: 4 additions & 4 deletions man/ggmissing-ggproto.Rd → man/naniar-ggproto.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/percent_missing_case.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/percent_missing_df.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion man/percent_missing_var.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

File renamed without changes.
19 changes: 11 additions & 8 deletions tests/testthat/test-missingness-tidiers.R
@@ -1,17 +1,20 @@
context("missingness_tidiers")

test_that("percent_missing_* produces a single, numeric number", {
expect_length(ggmissing::percent_missing_df(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_df(airquality), "double")
expect_length(ggmissing::percent_missing_var(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_var(airquality), "double")
expect_length(ggmissing::percent_missing_case(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_case(airquality), "double")

expect_length(percent_missing_df(airquality), 1)
testthat::expect_type(percent_missing_df(airquality), "double")

expect_length(percent_missing_var(airquality), 1)
testthat::expect_type(percent_missing_var(airquality), "double")

expect_length(percent_missing_case(airquality), 1)
testthat::expect_type(percent_missing_case(airquality), "double")
})

test_that("table_missing_* produces a data_frame", {
testthat::expect_is(ggmissing::table_missing_var(airquality), "tbl_df")
testthat::expect_is(ggmissing::table_missing_case(airquality), "tbl_df")
testthat::expect_is(table_missing_var(airquality), "tbl_df")
testthat::expect_is(table_missing_case(airquality), "tbl_df")
})


0 comments on commit 6607a02

Please sign in to comment.