Add function set for `tidy_multi_` distribution functions #26

spsanderson · 2022-01-21T02:19:31Z

library(tidyverse)

tidy_multi_normal <- function(.n = 50, .mean = c(-1,0,1), .sd = c(1), .num_sims = 2){
  
  # Tidyeval ----
  n <- as.integer(.n)
  mu <- as.numeric(.mean)
  std <- as.numeric(.sd)
  num_sims <- as.integer(.num_sims)
  
  # Checks ----
  if(!is.integer(n) | n <= 0){
    rlang::abort(
      "The .n parameter must be an integer and greater than 0"
    )
  }
  
  if(!is.integer(num_sims) | num_sims <= 1){
    rlang::abort(
      "The .num_sims parameter must be an integer grater than 1."
    )
  }
  
  if(!is.numeric(mu)){
    rlang::abort(
      "The .mean parameter must be numeric."
    )
  }
  
  if(!is.numeric(std)){
    rlang::abort(
      "The .sd parameter must be numeric"
    )
  }
  
  x <- seq(1, num_sims, 1)
  
  ps <- seq(-n, n-1, 2)
  qs <- seq(0, 1, (1/(n-1)))
  
  df <- expand_grid(
    sim_number = x,
    mu = mu,
    std = std
  )
  
  df <- df %>%
    mutate(sim_number = as.factor(sim_number)) %>%
    dplyr::group_by(sim_number, mu, std) %>%
    dplyr::mutate(x = list(1:n)) %>%
    dplyr::mutate(y = list(stats::rnorm(n, mu, std))) %>%
    dplyr::mutate(d = list(density(unlist(y), n = n)[c("x","y")] %>%
                             purrr::set_names("dx","dy") %>%
                             dplyr::as_tibble())) %>%
    dplyr::mutate(p = list(stats::pnorm(ps, mu, std))) %>%
    dplyr::mutate(q = list(stats::qnorm(qs, mu, std))) %>%
    tidyr::unnest(cols = c(x, y, d, p, q)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(dist_type = paste0("Gaussian: c(", mu, ", ", std, ")")) %>%
    dplyr::mutate(dist_type = as.factor(dist_type)) %>%
    dplyr::select(
      sim_number, dist_type, dplyr::everything()
    ) %>%
    dplyr::arrange(sim_number, dist_type)
  
  # Attach attributes ----
  attr(df, ".n") <- .n
  attr(df, ".mean") <- .mean
  attr(df, ".sd") <- .sd
  attr(df, ".num_sims") <- .num_sims
  attr(df, "tibble_type") <- "tidy_multi_gaussian"
  attr(df, "ps") <- ps
  attr(df, "qs") <- qs
  
  # Return ----
  return(df)
  
}

tn <- tidy_multi_normal(.n = 500,.num_sims = 5)

atb <- attributes(tn)
n <- atb$.n
sims <- atb$.num_sims
dist_type = stringr::str_remove(atb$tibble_type, "tidy_multi_") %>%
  stringr::str_to_title()
sub_title = paste0(
  "Grouped Gaussian - Data Points: ", n, " - ",
  "Simulations: ", sims, "\n",
  "Distribution Family: ", dist_type, "\n",
  "Parameters: ", if(atb$tibble_type == "tidy_multi_gaussian"){
    paste0("Mean: c(", paste0(toString(atb$.mean), ")", " - SD: c(", toString(atb$.sd), ")"))
  }
)

tn %>%
  ggplot2::ggplot(ggplot2::aes(
    x = dx, 
    y = dy, 
    group = interaction(dist_type, sim_number), 
    color = dist_type)) +
  ggplot2::geom_line() +
  ggplot2::theme_minimal() +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::labs(
    title = "Density Plot",
    subtitle = sub_title,
    x = "",
    y = "Density",
    col

> tn %>%
+   group_by(dist_type, sim_number) %>%
+   summarise(mean_mu = mean(y))
`summarise()` has grouped output by 'dist_type'. You can override using the `.groups` argument.
# A tibble: 15 x 3
# Groups:   dist_type [3]
   dist_type          sim_number  mean_mu
   <fct>              <fct>         <dbl>
 1 Gaussian: c(-1, 1) 1          -0.947  
 2 Gaussian: c(-1, 1) 2          -0.971  
 3 Gaussian: c(-1, 1) 3          -1.03   
 4 Gaussian: c(-1, 1) 4          -0.959  
 5 Gaussian: c(-1, 1) 5          -0.981  
 6 Gaussian: c(0, 1)  1           0.00196
 7 Gaussian: c(0, 1)  2           0.00435
 8 Gaussian: c(0, 1)  3           0.0225 
 9 Gaussian: c(0, 1)  4          -0.0188 
10 Gaussian: c(0, 1)  5          -0.0652 
11 Gaussian: c(1, 1)  1           0.936  
12 Gaussian: c(1, 1)  2           0.971  
13 Gaussian: c(1, 1)  3           1.01   
14 Gaussian: c(1, 1)  4           0.916  
15 Gaussian: c(1, 1)  5           0.913  
> tn %>%
+   group_by(dist_type) %>%
+   summarise(mean_mu = mean(y))
# A tibble: 3 x 2
  dist_type          mean_mu
  <fct>                <dbl>
1 Gaussian: c(-1, 1) -0.978 
2 Gaussian: c(0, 1)  -0.0110
3 Gaussian: c(1, 1)   0.950

spsanderson · 2022-02-15T17:29:16Z

Final Function:

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  require("TidyDensity")
  
  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  )
  
  #func_parm_list <- as.list(df)
  names(df) <- formalArgs(td)

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::mutate(results = purrr::pmap(dplyr::cur_data(), match.fun(td)))

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

spsanderson self-assigned this Jan 21, 2022

spsanderson added the enhancement New feature or request label Jan 21, 2022

spsanderson added this to the TidyDensity 0.0.2 milestone Jan 21, 2022

This comment was marked as resolved.

Sign in to view

spsanderson closed this as completed Feb 15, 2022

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Add function set for `tidy_multi_` distribution functions #26

Add function set for `tidy_multi_` distribution functions #26

spsanderson commented Jan 21, 2022 •

edited

This comment was marked as resolved.

spsanderson commented Feb 15, 2022

Add function set for tidy_multi_ distribution functions #26

Add function set for tidy_multi_ distribution functions #26

Comments

spsanderson commented Jan 21, 2022 • edited

This comment was marked as resolved.

spsanderson commented Feb 15, 2022

Add function set for `tidy_multi_` distribution functions #26

Add function set for `tidy_multi_` distribution functions #26

spsanderson commented Jan 21, 2022 •

edited