drop palmerpenguins and modelr from suggests

mjskay · Feb 29, 2024 · fe42859 · fe42859
1 parent 2a3d9d9
commit fe42859
Show file tree

Hide file tree

Showing 7 changed files with 41 additions and 45 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -51,9 +51,7 @@ Suggests:
     showtext,
     mvtnorm,
     covr,
-    palmerpenguins,
     broom (>= 0.5.6),
-    modelr,
     patchwork,
     tidyr (>= 1.0.0),
     ragg,

diff --git a/NEWS.md b/NEWS.md
@@ -33,13 +33,13 @@ Minor changes:
   to more easily label spikes. (#203; thanks @mattansb for the suggestion).
 * The `arrow` parameter is now supported for intervals in `geom_slabinterval()`
   (#206; thanks to @ASKurz for the suggestion).
-* Several dependency reductions: removed {cowplot}, {purrr}, and {forcats}
-  from *Suggests*; moved {tidyselect} and {dplyr} from *Imports* to *Suggests*.
-  The latter two are only strictly necessary for `curve_interval()` due to its
-  use of grouped data frames and tidy selection to specify which columns are
-  conditional and which are joint (the use of grouped data frames with
-  `point_interval()` is less strictly necessary, and not used by stats, so
-  is easier to avoid as an absolute dependency).
+* Several dependency reductions: removed {cowplot}, {purrr}, {forcats}, 
+  {palmerpenguins}, and {modelr} from *Suggests*; moved {tidyselect} and {dplyr}
+  from *Imports* to *Suggests*. The latter two are only strictly necessary for 
+  `curve_interval()` due to its use of grouped data frames and tidy selection to 
+  specify which columns are conditional and which are joint (the use of grouped
+  data frames with `point_interval()` is less strictly necessary, and not used 
+  by stats, so is easier to avoid as an absolute dependency).
 
 Documentation:
 

diff --git a/README.Rmd b/README.Rmd
@@ -133,19 +133,18 @@ library(ggplot2)
 library(dplyr)
 library(tidyr)
 library(ggdist)
-library(broom)
 
 m_mpg = lm(mpg ~ hp * cyl, data = mtcars)
 mtcars_preds = mtcars %>%
   group_by(cyl) %>%
   expand(hp = seq(min(hp), max(hp), length.out = 50)) %>%
-  augment(m_mpg, newdata = ., se_fit = TRUE)
+  bind_cols(predict(m_mpg, newdata = ., se.fit = TRUE))
 
 mtcars_plot = function(.width = c(.6, .8, .95), alpha = 1/4, ...) {
   mtcars_preds %>%
     ggplot(aes(x = hp, fill = ordered(cyl), color = ordered(cyl))) +
     stat_lineribbon(
-      aes(dist = "norm", arg1 = .fitted, arg2 = .se.fit), 
+      aes(ydist = dist_student_t(df, fit, se.fit)), 
       .width = .width, alpha = alpha, ...
     ) +
     geom_point(aes(y = mpg), data = mtcars) +

diff --git a/man/figures/README/lineribbon-1.png b/man/figures/README/lineribbon-1.png
diff --git a/man/figures/README/preview-1.png b/man/figures/README/preview-1.png
diff --git a/vignettes/dotsinterval.Rmd b/vignettes/dotsinterval.Rmd
@@ -30,7 +30,6 @@ library(distributional)
 library(ggdist)
 library(ggplot2)
 library(patchwork)
-library(palmerpenguins)
 
 theme_set(theme_ggdist())
 ```
@@ -852,34 +851,35 @@ tibble(
 ## Logit dotplots
 
 To demonstrate another useful plot type, the *logit dotplot* (courtesy [Ladislas Nalborczyk](https://lnalborczyk.github.io/post/glm/)), we'll fit a 
-logistic regression to some data on the sex and body mass of Gentoo penguins.
+logistic regression to some data on the petal length of the *Iris versicolor* 
+and *Iris virginica* flowers.
 
 First, we'll demo varying the `side` aesthetic to create two dotplots that are
 "facing" each other: `scale_side_mirrored()` will set the `side` aesthetic to 
 `"top"` or `"bottom"` if two categories are assigned to `side`". We also adjust 
 the `scale` so that the dots don't overlap:
 
-```{r gentoo_raw, fig.width = med_width, fig.height = med_height}
-gentoo = penguins %>%
-  filter(species == "Gentoo", !is.na(sex))
+```{r iris_v, fig.width = med_width, fig.height = med_height}
+iris_v = iris %>%
+  filter(Species != "setosa")
 
-gentoo %>%
-  ggplot(aes(x = body_mass_g, y = sex, side = sex)) +
+iris_v %>%
+  ggplot(aes(x = Petal.Length, y = Species, side = Species)) +
   geom_dots(scale = 0.5) +
   scale_side_mirrored(guide = "none") +
   ggtitle(
     "geom_dots(scale = 0.5)",
-    'aes(side = sex) + scale_side_mirrored()'
+    'aes(side = Species) + scale_side_mirrored()'
   )
 ```
 
 This can also be accomplished by setting side directly and omitting
-`scale_side_mirrored()`; e.g. via `aes(side = ifelse(sex == "male", "bottom", "top"))`.
+`scale_side_mirrored()`; e.g. via `aes(side = ifelse(Species == "virginica", "bottom", "top"))`.
 
-Now we fit a logistic regression predicting sex based on body mass:
+Now we fit a logistic regression predicting species based on petal length:
 
-```{r m_gentoo}
-m = glm(sex == "male" ~ body_mass_g, data = gentoo, family = binomial)
+```{r m_iris_v}
+m = glm(Species == "virginica" ~ Petal.Length, data = iris_v, family = binomial)
 m
 ```
 
@@ -888,8 +888,8 @@ on top of the mirrored dotplots to create a *logit dotplot*:
 
 ```{r logit_dotplot, fig.width = med_width, fig.height = med_height/1.5}
 # construct a prediction grid for the fit line
-prediction_grid = with(gentoo,
-  data.frame(body_mass_g = seq(min(body_mass_g), max(body_mass_g), length.out = 100))
+prediction_grid = with(iris_v,
+  data.frame(Petal.Length = seq(min(Petal.Length), max(Petal.Length), length.out = 100))
 )
 
 prediction_grid %>%
@@ -898,25 +898,25 @@ prediction_grid %>%
     # distribution describing uncertainty in log odds
     log_odds = dist_normal(fit, se.fit),
     # inverse-logit transform the log odds to get
-    # distribution describing uncertainty in Pr(sex == "male")
-    p_male = dist_transformed(log_odds, plogis, qlogis)
+    # distribution describing uncertainty in Pr(Species == "virginica")
+    p_virginica = dist_transformed(log_odds, plogis, qlogis)
   ) %>%
-  ggplot(aes(x = body_mass_g)) +
+  ggplot(aes(x = Petal.Length)) +
   geom_dots(
-    aes(y = as.numeric(sex == "male"), side = sex),
+    aes(y = as.numeric(Species == "virginica"), side = Species),
     scale = 0.4,
-    data = gentoo
+    data = iris_v
   ) +
   stat_lineribbon(
-    aes(ydist = p_male), alpha = 1/4, fill = "#08306b"
+    aes(ydist = p_virginica), alpha = 1/4, fill = "#08306b"
   ) +
   scale_side_mirrored(guide = "none") +
   coord_cartesian(ylim = c(0, 1)) +
   labs(
-    title = "logit dotplot: stat_dots() with stat_lineribbon()",
-    subtitle = 'aes(side = sex) + scale_side_mirrored()',
-    x = "Body mass (g) of Gentoo penguins",
-    y = "Pr(sex = male)"
+    title = "logit dotplot: geom_dots() with stat_lineribbon()",
+    subtitle = 'aes(side = Species) + scale_side_mirrored()',
+    x = "Petal Length",
+    y = "Pr(Species = virginica)"
   )
 ```
 

diff --git a/vignettes/freq-uncertainty-vis.Rmd b/vignettes/freq-uncertainty-vis.Rmd
@@ -33,7 +33,6 @@ library(tidyr)
 library(ggdist)
 library(ggplot2)
 library(broom)
-library(modelr)
 library(distributional)
 
 theme_set(theme_ggdist())
@@ -114,7 +113,7 @@ m_ABC %>%
   )
 ```
 
-If we would rather see uncertainty in conditional means, we can instead use `modelr::data_grid()` along with `broom::augment()` (similar to how we can use `modelr::data_grid()` with `tidybayes::add_fitted_draws()` for Bayesian models). Here we want the confidence distribution for the mean in condition $c$, $\tilde\mu_c$:
+If we would rather see uncertainty in conditional means, we can instead use `tidyr::expand()` along with `broom::augment()` (similar to how we can use `tidyr::expand()` with `tidybayes::add_fitted_draws()` for Bayesian models). Here we want the confidence distribution for the mean in condition $c$, $\tilde\mu_c$:
 
 $$
 \tilde\mu_c \sim \textrm{student_t}\left(\nu, \hat\mu_c, \sigma_{\hat\mu_c} \right)
@@ -130,7 +129,7 @@ Putting everything together, we have:
 
 ```{r halfeye_with_data, fig.width = tiny_width, fig.height = tiny_height}
 ABC %>%
-  data_grid(condition) %>%
+  expand(condition) %>%
   augment(m_ABC, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(y = condition)) +
   stat_halfeye(
@@ -146,7 +145,7 @@ Of course, this works with the entire `stat_slabinterval()` family. Here are gra
 
 ```{r gradientinterval, fig.width = tiny_width, fig.height = tiny_height}
 ABC %>%
-  data_grid(condition) %>%
+  expand(condition) %>%
   augment(m_ABC, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(y = condition)) +
   stat_gradientinterval(
@@ -163,7 +162,7 @@ Or complementary cumulative distribution function (CCDF) bar plots:
 
 ```{r ccdfinterval, fig.width = tiny_width, fig.height = tiny_height}
 ABC %>%
-  data_grid(condition) %>%
+  expand(condition) %>%
   augment(m_ABC, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(y = condition)) +
   stat_ccdfinterval(
@@ -175,7 +174,7 @@ We can also create quantile dotplots by using the `dots` family of geoms. Quanti
 
 ```{r dotplot, fig.width = tiny_width, fig.height = tiny_height}
 ABC %>%
-  data_grid(condition) %>%
+  expand(condition) %>%
   augment(m_ABC, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(y = condition)) +
   stat_dots(
@@ -194,12 +193,12 @@ The same principle of reconstructing the confidence distribution allows us to us
 m_mpg = lm(mpg ~ hp * cyl, data = mtcars)
 ```
 
-Again we'll use `modelr::data_grid()` with `broom::tidy()`, but now we'll employ `stat_lineribbon()`:
+Again we'll use `tidyr::expand()` with `broom::tidy()`, but now we'll employ `stat_lineribbon()`:
 
 ```{r lineribbon, fig.width = tiny_width, fig.height = tiny_height}
 mtcars %>%
   group_by(cyl) %>%
-  data_grid(hp = seq_range(hp, n = 101)) %>%
+  expand(hp = seq(min(hp), max(hp), length.out = 101)) %>%
   augment(m_mpg, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(x = hp, fill = ordered(cyl), color = ordered(cyl))) +
   stat_lineribbon(
@@ -226,7 +225,7 @@ variable is computed by `stat_lineribbon()` and is an ordered factor version of
 ```{r lineribbon_lightened, fig.width = tiny_width, fig.height = tiny_height}
 mtcars %>%
   group_by(cyl) %>%
-  data_grid(hp = seq_range(hp, n = 101)) %>%
+  expand(hp = seq(min(hp), max(hp), length.out = 101)) %>%
   augment(m_mpg, newdata = ., se_fit = TRUE) %>%
   ggplot(aes(x = hp, color = ordered(cyl))) +
   stat_lineribbon(aes(