diff --git a/DESCRIPTION b/DESCRIPTION index 59e01bb1..c0dba11e 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -51,9 +51,7 @@ Suggests: showtext, mvtnorm, covr, - palmerpenguins, broom (>= 0.5.6), - modelr, patchwork, tidyr (>= 1.0.0), ragg, diff --git a/NEWS.md b/NEWS.md index 908e9214..4ff9811b 100755 --- a/NEWS.md +++ b/NEWS.md @@ -33,13 +33,13 @@ Minor changes: to more easily label spikes. (#203; thanks @mattansb for the suggestion). * The `arrow` parameter is now supported for intervals in `geom_slabinterval()` (#206; thanks to @ASKurz for the suggestion). -* Several dependency reductions: removed {cowplot}, {purrr}, and {forcats} - from *Suggests*; moved {tidyselect} and {dplyr} from *Imports* to *Suggests*. - The latter two are only strictly necessary for `curve_interval()` due to its - use of grouped data frames and tidy selection to specify which columns are - conditional and which are joint (the use of grouped data frames with - `point_interval()` is less strictly necessary, and not used by stats, so - is easier to avoid as an absolute dependency). +* Several dependency reductions: removed {cowplot}, {purrr}, {forcats}, + {palmerpenguins}, and {modelr} from *Suggests*; moved {tidyselect} and {dplyr} + from *Imports* to *Suggests*. The latter two are only strictly necessary for + `curve_interval()` due to its use of grouped data frames and tidy selection to + specify which columns are conditional and which are joint (the use of grouped + data frames with `point_interval()` is less strictly necessary, and not used + by stats, so is easier to avoid as an absolute dependency). Documentation: diff --git a/README.Rmd b/README.Rmd index 0c7b3403..cbe040d6 100755 --- a/README.Rmd +++ b/README.Rmd @@ -133,19 +133,18 @@ library(ggplot2) library(dplyr) library(tidyr) library(ggdist) -library(broom) m_mpg = lm(mpg ~ hp * cyl, data = mtcars) mtcars_preds = mtcars %>% group_by(cyl) %>% expand(hp = seq(min(hp), max(hp), length.out = 50)) %>% - augment(m_mpg, newdata = ., se_fit = TRUE) + bind_cols(predict(m_mpg, newdata = ., se.fit = TRUE)) mtcars_plot = function(.width = c(.6, .8, .95), alpha = 1/4, ...) { mtcars_preds %>% ggplot(aes(x = hp, fill = ordered(cyl), color = ordered(cyl))) + stat_lineribbon( - aes(dist = "norm", arg1 = .fitted, arg2 = .se.fit), + aes(ydist = dist_student_t(df, fit, se.fit)), .width = .width, alpha = alpha, ... ) + geom_point(aes(y = mpg), data = mtcars) + diff --git a/man/figures/README/lineribbon-1.png b/man/figures/README/lineribbon-1.png index 4ff3ed93..49e634c7 100755 Binary files a/man/figures/README/lineribbon-1.png and b/man/figures/README/lineribbon-1.png differ diff --git a/man/figures/README/preview-1.png b/man/figures/README/preview-1.png index f8db8799..376f682c 100755 Binary files a/man/figures/README/preview-1.png and b/man/figures/README/preview-1.png differ diff --git a/vignettes/dotsinterval.Rmd b/vignettes/dotsinterval.Rmd index de4cce2b..959ab11c 100755 --- a/vignettes/dotsinterval.Rmd +++ b/vignettes/dotsinterval.Rmd @@ -30,7 +30,6 @@ library(distributional) library(ggdist) library(ggplot2) library(patchwork) -library(palmerpenguins) theme_set(theme_ggdist()) ``` @@ -852,34 +851,35 @@ tibble( ## Logit dotplots To demonstrate another useful plot type, the *logit dotplot* (courtesy [Ladislas Nalborczyk](https://lnalborczyk.github.io/post/glm/)), we'll fit a -logistic regression to some data on the sex and body mass of Gentoo penguins. +logistic regression to some data on the petal length of the *Iris versicolor* +and *Iris virginica* flowers. First, we'll demo varying the `side` aesthetic to create two dotplots that are "facing" each other: `scale_side_mirrored()` will set the `side` aesthetic to `"top"` or `"bottom"` if two categories are assigned to `side`". We also adjust the `scale` so that the dots don't overlap: -```{r gentoo_raw, fig.width = med_width, fig.height = med_height} -gentoo = penguins %>% - filter(species == "Gentoo", !is.na(sex)) +```{r iris_v, fig.width = med_width, fig.height = med_height} +iris_v = iris %>% + filter(Species != "setosa") -gentoo %>% - ggplot(aes(x = body_mass_g, y = sex, side = sex)) + +iris_v %>% + ggplot(aes(x = Petal.Length, y = Species, side = Species)) + geom_dots(scale = 0.5) + scale_side_mirrored(guide = "none") + ggtitle( "geom_dots(scale = 0.5)", - 'aes(side = sex) + scale_side_mirrored()' + 'aes(side = Species) + scale_side_mirrored()' ) ``` This can also be accomplished by setting side directly and omitting -`scale_side_mirrored()`; e.g. via `aes(side = ifelse(sex == "male", "bottom", "top"))`. +`scale_side_mirrored()`; e.g. via `aes(side = ifelse(Species == "virginica", "bottom", "top"))`. -Now we fit a logistic regression predicting sex based on body mass: +Now we fit a logistic regression predicting species based on petal length: -```{r m_gentoo} -m = glm(sex == "male" ~ body_mass_g, data = gentoo, family = binomial) +```{r m_iris_v} +m = glm(Species == "virginica" ~ Petal.Length, data = iris_v, family = binomial) m ``` @@ -888,8 +888,8 @@ on top of the mirrored dotplots to create a *logit dotplot*: ```{r logit_dotplot, fig.width = med_width, fig.height = med_height/1.5} # construct a prediction grid for the fit line -prediction_grid = with(gentoo, - data.frame(body_mass_g = seq(min(body_mass_g), max(body_mass_g), length.out = 100)) +prediction_grid = with(iris_v, + data.frame(Petal.Length = seq(min(Petal.Length), max(Petal.Length), length.out = 100)) ) prediction_grid %>% @@ -898,25 +898,25 @@ prediction_grid %>% # distribution describing uncertainty in log odds log_odds = dist_normal(fit, se.fit), # inverse-logit transform the log odds to get - # distribution describing uncertainty in Pr(sex == "male") - p_male = dist_transformed(log_odds, plogis, qlogis) + # distribution describing uncertainty in Pr(Species == "virginica") + p_virginica = dist_transformed(log_odds, plogis, qlogis) ) %>% - ggplot(aes(x = body_mass_g)) + + ggplot(aes(x = Petal.Length)) + geom_dots( - aes(y = as.numeric(sex == "male"), side = sex), + aes(y = as.numeric(Species == "virginica"), side = Species), scale = 0.4, - data = gentoo + data = iris_v ) + stat_lineribbon( - aes(ydist = p_male), alpha = 1/4, fill = "#08306b" + aes(ydist = p_virginica), alpha = 1/4, fill = "#08306b" ) + scale_side_mirrored(guide = "none") + coord_cartesian(ylim = c(0, 1)) + labs( - title = "logit dotplot: stat_dots() with stat_lineribbon()", - subtitle = 'aes(side = sex) + scale_side_mirrored()', - x = "Body mass (g) of Gentoo penguins", - y = "Pr(sex = male)" + title = "logit dotplot: geom_dots() with stat_lineribbon()", + subtitle = 'aes(side = Species) + scale_side_mirrored()', + x = "Petal Length", + y = "Pr(Species = virginica)" ) ``` diff --git a/vignettes/freq-uncertainty-vis.Rmd b/vignettes/freq-uncertainty-vis.Rmd index 2ca690af..0be9bc79 100644 --- a/vignettes/freq-uncertainty-vis.Rmd +++ b/vignettes/freq-uncertainty-vis.Rmd @@ -33,7 +33,6 @@ library(tidyr) library(ggdist) library(ggplot2) library(broom) -library(modelr) library(distributional) theme_set(theme_ggdist()) @@ -114,7 +113,7 @@ m_ABC %>% ) ``` -If we would rather see uncertainty in conditional means, we can instead use `modelr::data_grid()` along with `broom::augment()` (similar to how we can use `modelr::data_grid()` with `tidybayes::add_fitted_draws()` for Bayesian models). Here we want the confidence distribution for the mean in condition $c$, $\tilde\mu_c$: +If we would rather see uncertainty in conditional means, we can instead use `tidyr::expand()` along with `broom::augment()` (similar to how we can use `tidyr::expand()` with `tidybayes::add_fitted_draws()` for Bayesian models). Here we want the confidence distribution for the mean in condition $c$, $\tilde\mu_c$: $$ \tilde\mu_c \sim \textrm{student_t}\left(\nu, \hat\mu_c, \sigma_{\hat\mu_c} \right) @@ -130,7 +129,7 @@ Putting everything together, we have: ```{r halfeye_with_data, fig.width = tiny_width, fig.height = tiny_height} ABC %>% - data_grid(condition) %>% + expand(condition) %>% augment(m_ABC, newdata = ., se_fit = TRUE) %>% ggplot(aes(y = condition)) + stat_halfeye( @@ -146,7 +145,7 @@ Of course, this works with the entire `stat_slabinterval()` family. Here are gra ```{r gradientinterval, fig.width = tiny_width, fig.height = tiny_height} ABC %>% - data_grid(condition) %>% + expand(condition) %>% augment(m_ABC, newdata = ., se_fit = TRUE) %>% ggplot(aes(y = condition)) + stat_gradientinterval( @@ -163,7 +162,7 @@ Or complementary cumulative distribution function (CCDF) bar plots: ```{r ccdfinterval, fig.width = tiny_width, fig.height = tiny_height} ABC %>% - data_grid(condition) %>% + expand(condition) %>% augment(m_ABC, newdata = ., se_fit = TRUE) %>% ggplot(aes(y = condition)) + stat_ccdfinterval( @@ -175,7 +174,7 @@ We can also create quantile dotplots by using the `dots` family of geoms. Quanti ```{r dotplot, fig.width = tiny_width, fig.height = tiny_height} ABC %>% - data_grid(condition) %>% + expand(condition) %>% augment(m_ABC, newdata = ., se_fit = TRUE) %>% ggplot(aes(y = condition)) + stat_dots( @@ -194,12 +193,12 @@ The same principle of reconstructing the confidence distribution allows us to us m_mpg = lm(mpg ~ hp * cyl, data = mtcars) ``` -Again we'll use `modelr::data_grid()` with `broom::tidy()`, but now we'll employ `stat_lineribbon()`: +Again we'll use `tidyr::expand()` with `broom::tidy()`, but now we'll employ `stat_lineribbon()`: ```{r lineribbon, fig.width = tiny_width, fig.height = tiny_height} mtcars %>% group_by(cyl) %>% - data_grid(hp = seq_range(hp, n = 101)) %>% + expand(hp = seq(min(hp), max(hp), length.out = 101)) %>% augment(m_mpg, newdata = ., se_fit = TRUE) %>% ggplot(aes(x = hp, fill = ordered(cyl), color = ordered(cyl))) + stat_lineribbon( @@ -226,7 +225,7 @@ variable is computed by `stat_lineribbon()` and is an ordered factor version of ```{r lineribbon_lightened, fig.width = tiny_width, fig.height = tiny_height} mtcars %>% group_by(cyl) %>% - data_grid(hp = seq_range(hp, n = 101)) %>% + expand(hp = seq(min(hp), max(hp), length.out = 101)) %>% augment(m_mpg, newdata = ., se_fit = TRUE) %>% ggplot(aes(x = hp, color = ordered(cyl))) + stat_lineribbon(aes(