Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bootstrap_density_tbl #237

Closed
Tracked by #236
spsanderson opened this issue Aug 12, 2022 · 1 comment
Closed
Tracked by #236

bootstrap_density_tbl #237

spsanderson opened this issue Aug 12, 2022 · 1 comment
Assignees
Labels
enhancement New feature or request

Comments

@spsanderson
Copy link
Owner

spsanderson commented Aug 12, 2022

Function is not yet fully outlined but it is something like this:

library(TidyDensity)
library(dplyr)
library(purrr)
library(tidyr)
library(ggplot2)

x <- mtcars$mpg

# Add density tibble from stats::dens()
bootstrap_density_tbl <- tidy_bootstrap(x) %>%
  bootstrap_unnest_tbl() %>%
  nest_by(sim_number) %>%
  mutate(dens_tbl = list(density(unlist(data), n = nrow(data))[c("x","y")] %>%
                           set_names("dx","dy") %>%
                           as_tibble())) %>%
  select(sim_number, dens_tbl) %>%
  unnest(cols = dens_tbl) %>%
  mutate(x = row_number()) %>%
  ungroup()

> bootstrap_density_tbl
# A tibble: 50,000 × 4
   sim_number    dx       dy     x
   <fct>      <dbl>    <dbl> <int>
 1 1           6.06 0.000124     1
 2 1           7.40 0.00130      2
 3 1           8.74 0.00581      3
 4 1          10.1  0.0118       4
 5 1          11.4  0.0143       5
 6 1          12.8  0.0204       6
 7 1          14.1  0.0360       7
 8 1          15.4  0.0582       8
 9 1          16.8  0.0826       9
10 1          18.1  0.104       10

bootstrap_density_tbl %>%
  ggplot(aes(x = dx, y = dy, group = sim_number)) +
  geom_line(color = "gray") +
  geom_smooth(group = "none", se = FALSE, color = "red", linetype = "dashed") +
  theme_minimal()

image

bootstrap_density_tbl %>%
  group_by(x) %>%
  summarise(
    dx = mean(dx),
    dy = mean(dy)
  ) %>%
  ungroup() %>%
  ggplot(aes(x = dx, y = dy)) +
  geom_line(
    data = bootstrap_density_tbl %>%
      select(dx, dy, sim_number),
    aes(x = dx, y = dy, group = sim_number),
    color = "gray"
  ) + 
  geom_line() +
  theme_minimal()

image

@spsanderson spsanderson self-assigned this Aug 12, 2022
@spsanderson spsanderson added the enhancement New feature or request label Aug 12, 2022
@spsanderson spsanderson added this to the TidyDensity 1.2.3 milestone Aug 12, 2022
@spsanderson
Copy link
Owner Author

Possible function:

bootstrap_density_augment <- function(.data){
  
  atb <- attributes(.data)
  
  # Checks 
  if (!is.data.frame(.data)){
    rlang::abort(
      message = "'.data' is expecting a data.frame/tibble. Please supply.",
      use_cli_format = TRUE
    )
  }
  
  if (!atb$tibble_type %in% c("tidy_bootstrap","tidy_bootstrap_nested")){
    rlang::abort(
      message = "Must pass data to this function from either tidy_bootstrap() or 
      bootstrap_unnest_tbl().",
      use_cli_format = TRUE
    )
  }
  
  # Add density data
  if(atb$tibble_type == "tidy_bootstrap_nested"){
    df_tbl <- dplyr::as_tibble(.data) %>% 
      TidyDensity::bootstrap_unnest_tbl()
  }
  
  if(atb$tibble_type == "tidy_bootstrap"){
    df_tbl <- dplyr::as_tibble(.data)
  }
  
  df_tbl <- df_tbl %>%
    dplyr::nest_by(sim_number) %>%
    dplyr::mutate(dens_tbl = list(
      stats::density(unlist(data),
                     n = nrow(data))[c("x","y")] %>%
        purrr::set_names("dx","dy") %>%
        dplyr::as_tibble())) %>%
    tidyr::unnest(cols = c(data, dens_tbl)) %>%
    dplyr::mutate(x = dplyr::row_number()) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, x, y, dx, dy)

  # Return
  return(df_tbl)

}

Example:

library(TidyDensity)
library(tidyverse)

x <- mtcars$mpg

# Add density tibble from stats::dens()
bootstrap_density_tbl <- tidy_bootstrap(x)

> bootstrap_density_augment(bootstrap_density_tbl)
# A tibble: 50,000 × 5
   sim_number     x     y    dx        dy
   <fct>      <int> <dbl> <dbl>     <dbl>
 1 1              1  19.7  3.73 0.0000824
 2 1              2  27.3  5.27 0.000529 
 3 1              3  10.4  6.80 0.00223  
 4 1              4  15.5  8.34 0.00663  
 5 1              5  16.4  9.87 0.0159   
 6 1              6  13.3 11.4  0.0336   
 7 1              7  14.7 12.9  0.0597   
 8 1              8  26   14.5  0.0808   
 9 1              9  14.7 16.0  0.0812   
10 1             10  21   17.5  0.0670   
# … with 49,990 more rows
# ℹ Use `print(n = ...)` to see more rows

OR with table unnested already:

bootstrap_density_tbl <- tidy_bootstrap(x) %>%
  bootstrap_unnest_tbl()

> bootstrap_density_augment(bootstrap_density_tbl)
# A tibble: 50,000 × 5
   sim_number     x     y    dx       dy
   <fct>      <int> <dbl> <dbl>    <dbl>
 1 1              1  26    4.77 0.000106
 2 1              2  15    6.34 0.000563
 3 1              3  17.3  7.91 0.00229 
 4 1              4  24.4  9.48 0.00718 
 5 1              5  15.8 11.0  0.0175  
 6 1              6  24.4 12.6  0.0335  
 7 1              7  30.4 14.2  0.0505  
 8 1              8  17.3 15.8  0.0616  
 9 1              9  22.8 17.3  0.0633  
10 1             10  33.9 18.9  0.0587  
# … with 49,990 more rows
# ℹ Use `print(n = ...)` to see more rows

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Development

No branches or pull requests

1 participant