Permalink
Browse files

change name from ggmissing to naniar

  • Loading branch information...
njtierney committed Dec 16, 2016
1 parent 06259fc commit 6607a02415053be2386822b5d1f3b8ac0c897106
@@ -1,10 +1,10 @@
Package: ggmissing
Package: naniar
Type: Package
Title: enables ggplot to plot missing data
Title: Enables ggplot to plot missing data
Version: 0.2.9000
Author: Nicholas Tierney, Miles McBain, Di Cook,
Maintainer: <nicholas.tierney@gmail.com>
Description: ggmissing helps display missing data in ggplot
Description: naniar helps display missing data in ggplot
License: MIT + file LICENSE
LazyData: TRUE
Suggests:
@@ -2,7 +2,7 @@
#' @name geom_missing_histogram
#' @description geom_missing_histogram adds a point geometry for displaying missingness.
#' @note Very first attempt at creating a geom that is compatible with ggplot2.
#'
#'
#' Data plotting works. Still todo:
#' manipulate the colour aes so that the colours and legend appear.
#' fix awful default point sizes.
@@ -39,10 +39,10 @@ geom_missing_histogram <- function(mapping = NULL,
...
)
)

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
GeomMissingHistogram <- ggproto("GeomMissingHistogram", GeomRect,
required_aes = "x",
@@ -11,7 +11,7 @@
#'
# this code messes up the documentation
# library(ggplot2)
# library(ggmissing)
# library(naniar)
#
# ggplot(data = brfss,
# aes(x = PHYSHLTH,
@@ -50,7 +50,7 @@ geom_missing_point <- function(mapping = NULL,

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
GeomMissingPoint <- ggproto("GeomMissingPoint", GeomPoint,
required_aes = c("x", "y"),

This file was deleted.

Oops, something went wrong.
@@ -13,7 +13,7 @@
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#' percent_missing_df(airquality)
#'
percent_missing_df <- function(dat){
@@ -36,7 +36,7 @@ percent_missing_df <- function(dat){
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#'
#' percent_missing_var(airquality)
#'
@@ -65,7 +65,7 @@ percent_missing_var <- function(dat){
#'
#' @examples
#'
#' library(ggmissing)
#' library(naniar)
#' percent_missing_case(airquality)
#'
percent_missing_case <- function(dat){
@@ -0,0 +1,6 @@
#' @name naniar-ggproto
#' @title naniar-ggroto
#'
#' @description These are the stat and geom overrides using ggproto from ggplot2 that make naniar work.
#'
NULL
File renamed without changes.
@@ -0,0 +1,18 @@
# derive_shadows:
# helpers to clear up common representations of missing values
# such as "NA", "N/A", etc.
# and might allow for an easier way for users to describe different
# missing data codes, such as -99, which might indicate missing, but
# some other kind of missing value, perhaps a different mechanism of
# missingness
# ideas for function names:
# narify (play on clarify)
# darken ()
# shade
# refract

# Other commands that might be useful?
# `is_na`
# `fill_na`
# `drop_na`
# `is_null`
@@ -0,0 +1,87 @@

# give NAs a more meaningful label
is_na <- function(x) {
factor(is.na(x), levels = c(FALSE, TRUE), labels = c("!NA", "NA"))
}

# append some shadow cols

# return a tibble that is a shadow matrix form.

as_shadow <- function(data){

data_shadow <- purrr::map_df(data, is_na)

names(data_shadow) <- paste0(names(data),"_NA")

data_shadow

}

# as_shadow(airquality)

bind_shadow <- function(data){

# data_shadow <- map_df(data, is_na)
# names(data_shadow) <- paste0(names(data),"_NA")
data_shadow <- as_shadow(data)

bound_shadow <- tibble::as_tibble(dplyr::bind_cols(data, data_shadow))

bound_shadow

}

ggplot(data = bind_shadow(airquality),
aes(x = Ozone)) +
geom_histogram() +
facet_wrap(~Solar.R_NA,
ncol = 1)
#
ggplot(data = airquality,
aes(x = Ozone)) +
geom_histogram() +
facet_wrap(~is_na(Solar.R),
ncol = 1)
#
# ggplot(bind_shadow(airquality),
# aes(x = Ozone,
# colour = Solar.R_NA)) +
# geom_density() +
# geom_rug()
#
# ggplot(airquality,
# aes(x = Ozone,
# colour = is_na(Solar.R))) +
# geom_density() +
# geom_rug()

# other "long" shadow format

# shadow_join exists as we want to include this extra metadata about the rows that have missing data, but I also wanted to include some extra information about the class of the data, in case we need to gather the data back into a wider, rather than long, format. Here it takes a function `visdat`, `visdat:::fingerprint`, which is currently not a particularly complex function.

gather_shadow <- function(df){

df_val_type <- df %>%
as_data_frame() %>%
purrr::dmap(visdat:::fingerprint) %>%
mutate(rows = row_number()) %>%
tidyr::gather_(key_col = "variable",
value_col = "valueType",
gather_cols = names(.)[-length(.)])

# df_shadow
df_shadow <- df %>%
as_data_frame() %>%
mutate(rows = row_number()) %>%
gather(key = variable,
value = value,
-rows) %>%
mutate(shadow_matrix = is_na(value)) %>%
left_join(df_val_type)

# perhaps define some attributes

}

aq_shadow <- gather_shadow(airquality)
@@ -29,11 +29,11 @@ stat_missing_histogram <- function(mapping = NULL,
inherit.aes = inherit.aes,
params = list(na.rm = na.rm, ...)
)

}


#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
StatMissingHistogram <- ggproto("StatMissingHistogram", Stat,
required_aes = c("x"),
@@ -31,7 +31,7 @@ stat_missing_point <- function(mapping = NULL,

}

#' @rdname ggmissing-ggproto
#' @rdname naniar-ggproto
#' @export
StatMissingPoint <- ggproto("StatMissingPoint", Stat,
required_aes = c("x", "y"),
@@ -12,32 +12,32 @@ knitr::opts_chunk$set(
)
```

# ggmissing
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/njtierney/ggmissing?branch=master&svg=true)](https://ci.appveyor.com/project/njtierney/ggmissing) [![Travis-CI Build Status](https://travis-ci.org/njtierney/ggmissing.svg?branch=master)](https://travis-ci.org/njtierney/ggmissing)
# naniar
[![AppVeyor Build Status](https://ci.appveyor.com/api/projects/status/github/njtierney/naniar?branch=master&svg=true)](https://ci.appveyor.com/project/njtierney/naniar) [![Travis-CI Build Status](https://travis-ci.org/njtierney/naniar.svg?branch=master)](https://travis-ci.org/njtierney/naniar)

`ggmissing` adds ggplot `geom`s to display missingness.
`naniar` adds ggplot `geom`s to display missingness.

## Why?

Missing data is ubiquitous in data analysis. However, vis points are omitted in ggplot, and although it does provides a warning message telling you that you have missing data, it is tricky to visualise missing data.

`ggmissing` is part of a larger plan for a set of tidy-verse packages focussing on how to tidy, transform, visualise, model, and communicate missing data.
`naniar` is part of a larger plan for a set of tidy-verse packages focussing on how to tidy, transform, visualise, model, and communicate missing data.

It is still very much under development, and may have unknown bugs, due to the fact that ggplot was not initially built to handle missing data in this way. We will see more active development over the next 6 months.

Please note that this project is released with a [Contributor Code of Conduct](CONDUCT.md). By participating in this project you agree to abide by its terms.

## What does it do?

`ggmissing` provides:
`naniar` provides:

1. Missing data geoms for ggplot (`geom_missing_point`)

2. Tidyverse summary functions for missing data (`summarise_missingness` and friends)

3. ggplots for exploring missing data (`gg_missing_var`, `gg_missing_case`, `gg_missing_which`)

## Using ggmissing {#ggplot}
## Using naniar {#ggplot}

### How does it work?

@@ -46,8 +46,8 @@ Plotting missing data might sound a little strange - how do you visualise someth
To illustrate, let's explore the relationship between Ozone and Solar radiation from the airquality dataset.

```{r messages = FALSE, error = FALSE}
library(ggmissing)
# devtools::install_github("njtierney/ggmissing")
library(naniar)
# devtools::install_github("njtierney/naniar")
library(ggplot2)
library(dplyr)
@@ -68,7 +68,7 @@ We can instead use the `geom_missing_point()` to display the missing data

```{r}
library(ggmissing)
library(naniar)
library(ggplot2)
ggplot(data = airquality,
@@ -105,7 +105,7 @@ p1 + theme_bw()

# Missing data tidying functions {#tidying}

`ggmissing` uses some missingness transformation functions to set up tables for visualisation.
`naniar` uses some missingness transformation functions to set up tables for visualisation.

```{r}
@@ -207,7 +207,7 @@ vis_miss(airquality)

# Future Work

`ggmissing` has not seen much attention for the past 6 months or so, and so will be undergoing more changes over the next 6 months, with plans to have the package in CRAN before the end of 2016.
`naniar` has not seen much attention for the past 6 months or so, and so will be undergoing more changes over the next 6 months, with plans to have the package in CRAN before the end of 2016.

As such, we plan to extend the `geom_missing` family to include:

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
File renamed without changes.
@@ -1,17 +1,20 @@
context("missingness_tidiers")

test_that("percent_missing_* produces a single, numeric number", {
expect_length(ggmissing::percent_missing_df(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_df(airquality), "double")
expect_length(ggmissing::percent_missing_var(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_var(airquality), "double")
expect_length(ggmissing::percent_missing_case(airquality), 1)
testthat::expect_type(ggmissing::percent_missing_case(airquality), "double")

expect_length(percent_missing_df(airquality), 1)
testthat::expect_type(percent_missing_df(airquality), "double")

expect_length(percent_missing_var(airquality), 1)
testthat::expect_type(percent_missing_var(airquality), "double")

expect_length(percent_missing_case(airquality), 1)
testthat::expect_type(percent_missing_case(airquality), "double")
})

test_that("table_missing_* produces a data_frame", {
testthat::expect_is(ggmissing::table_missing_var(airquality), "tbl_df")
testthat::expect_is(ggmissing::table_missing_case(airquality), "tbl_df")
testthat::expect_is(table_missing_var(airquality), "tbl_df")
testthat::expect_is(table_missing_case(airquality), "tbl_df")
})


0 comments on commit 6607a02

Please sign in to comment.