Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add function set for tidy_multi_ distribution functions #26

Closed
spsanderson opened this issue Jan 21, 2022 · 2 comments
Closed

Add function set for tidy_multi_ distribution functions #26

spsanderson opened this issue Jan 21, 2022 · 2 comments
Assignees
Labels
enhancement New feature or request

Comments

@spsanderson
Copy link
Owner

spsanderson commented Jan 21, 2022

library(tidyverse)

tidy_multi_normal <- function(.n = 50, .mean = c(-1,0,1), .sd = c(1), .num_sims = 2){
  
  # Tidyeval ----
  n <- as.integer(.n)
  mu <- as.numeric(.mean)
  std <- as.numeric(.sd)
  num_sims <- as.integer(.num_sims)
  
  # Checks ----
  if(!is.integer(n) | n <= 0){
    rlang::abort(
      "The .n parameter must be an integer and greater than 0"
    )
  }
  
  if(!is.integer(num_sims) | num_sims <= 1){
    rlang::abort(
      "The .num_sims parameter must be an integer grater than 1."
    )
  }
  
  if(!is.numeric(mu)){
    rlang::abort(
      "The .mean parameter must be numeric."
    )
  }
  
  if(!is.numeric(std)){
    rlang::abort(
      "The .sd parameter must be numeric"
    )
  }
  
  x <- seq(1, num_sims, 1)
  
  ps <- seq(-n, n-1, 2)
  qs <- seq(0, 1, (1/(n-1)))
  
  df <- expand_grid(
    sim_number = x,
    mu = mu,
    std = std
  )
  
  df <- df %>%
    mutate(sim_number = as.factor(sim_number)) %>%
    dplyr::group_by(sim_number, mu, std) %>%
    dplyr::mutate(x = list(1:n)) %>%
    dplyr::mutate(y = list(stats::rnorm(n, mu, std))) %>%
    dplyr::mutate(d = list(density(unlist(y), n = n)[c("x","y")] %>%
                             purrr::set_names("dx","dy") %>%
                             dplyr::as_tibble())) %>%
    dplyr::mutate(p = list(stats::pnorm(ps, mu, std))) %>%
    dplyr::mutate(q = list(stats::qnorm(qs, mu, std))) %>%
    tidyr::unnest(cols = c(x, y, d, p, q)) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(dist_type = paste0("Gaussian: c(", mu, ", ", std, ")")) %>%
    dplyr::mutate(dist_type = as.factor(dist_type)) %>%
    dplyr::select(
      sim_number, dist_type, dplyr::everything()
    ) %>%
    dplyr::arrange(sim_number, dist_type)
  
  # Attach attributes ----
  attr(df, ".n") <- .n
  attr(df, ".mean") <- .mean
  attr(df, ".sd") <- .sd
  attr(df, ".num_sims") <- .num_sims
  attr(df, "tibble_type") <- "tidy_multi_gaussian"
  attr(df, "ps") <- ps
  attr(df, "qs") <- qs
  
  # Return ----
  return(df)
  
}

tn <- tidy_multi_normal(.n = 500,.num_sims = 5)

atb <- attributes(tn)
n <- atb$.n
sims <- atb$.num_sims
dist_type = stringr::str_remove(atb$tibble_type, "tidy_multi_") %>%
  stringr::str_to_title()
sub_title = paste0(
  "Grouped Gaussian - Data Points: ", n, " - ",
  "Simulations: ", sims, "\n",
  "Distribution Family: ", dist_type, "\n",
  "Parameters: ", if(atb$tibble_type == "tidy_multi_gaussian"){
    paste0("Mean: c(", paste0(toString(atb$.mean), ")", " - SD: c(", toString(atb$.sd), ")"))
  }
)

tn %>%
  ggplot2::ggplot(ggplot2::aes(
    x = dx, 
    y = dy, 
    group = interaction(dist_type, sim_number), 
    color = dist_type)) +
  ggplot2::geom_line() +
  ggplot2::theme_minimal() +
  ggplot2::theme(legend.position = "bottom") +
  ggplot2::labs(
    title = "Density Plot",
    subtitle = sub_title,
    x = "",
    y = "Density",
    col

image

> tn %>%
+   group_by(dist_type, sim_number) %>%
+   summarise(mean_mu = mean(y))
`summarise()` has grouped output by 'dist_type'. You can override using the `.groups` argument.
# A tibble: 15 x 3
# Groups:   dist_type [3]
   dist_type          sim_number  mean_mu
   <fct>              <fct>         <dbl>
 1 Gaussian: c(-1, 1) 1          -0.947  
 2 Gaussian: c(-1, 1) 2          -0.971  
 3 Gaussian: c(-1, 1) 3          -1.03   
 4 Gaussian: c(-1, 1) 4          -0.959  
 5 Gaussian: c(-1, 1) 5          -0.981  
 6 Gaussian: c(0, 1)  1           0.00196
 7 Gaussian: c(0, 1)  2           0.00435
 8 Gaussian: c(0, 1)  3           0.0225 
 9 Gaussian: c(0, 1)  4          -0.0188 
10 Gaussian: c(0, 1)  5          -0.0652 
11 Gaussian: c(1, 1)  1           0.936  
12 Gaussian: c(1, 1)  2           0.971  
13 Gaussian: c(1, 1)  3           1.01   
14 Gaussian: c(1, 1)  4           0.916  
15 Gaussian: c(1, 1)  5           0.913  
> tn %>%
+   group_by(dist_type) %>%
+   summarise(mean_mu = mean(y))
# A tibble: 3 x 2
  dist_type          mean_mu
  <fct>                <dbl>
1 Gaussian: c(-1, 1) -0.978 
2 Gaussian: c(0, 1)  -0.0110
3 Gaussian: c(1, 1)   0.950 
@spsanderson spsanderson self-assigned this Jan 21, 2022
@spsanderson spsanderson added the enhancement New feature or request label Jan 21, 2022
@spsanderson spsanderson added this to the TidyDensity 0.0.2 milestone Jan 21, 2022
@spsanderson

This comment was marked as resolved.

@spsanderson
Copy link
Owner Author

Final Function:

tidy_multi_dist <- function(
  .tidy_dist = NULL,
  .param_list = list()
) {

  require("TidyDensity")
  
  # Check param ----
  if (is.null(.tidy_dist)) {
    rlang::abort(
      "Please enter a 'tidy_' distribution function like 'tidy_normal' 
      in quotes."
    )
  }

  if (length(.param_list) == 0) {
    rlang::abort(
      "Please enter some parameters for your chosen 'tidy_' distribution."
    )
  }

  # Call used ---
  td <- as.character(.tidy_dist)

  # Params ----
  params <- .param_list

  # Params for the call ----
  n <- as.integer(params$.n)
  num_sims <- as.integer(params$.num_sims)
  x <- seq(1, num_sims, 1)

  # Final parameter list
  final_params_list <- params[which(!names(params) %in% c(".n", ".num_sims"))]

  # Set the grid to make the calls ----
  param_grid <- expand.grid(final_params_list)

  df <- tidyr::expand_grid(
    n = n,
    param_grid,
    sim = as.integer(x)
  )
  
  #func_parm_list <- as.list(df)
  names(df) <- formalArgs(td)

  # Run call on the grouped df ----
  dff <- df %>%
    dplyr::mutate(results = purrr::pmap(dplyr::cur_data(), match.fun(td)))

  # Get the attributes to be used later on ----
  atb <- dff$results[[1]] %>% attributes()

  # Make Dist Type for column ----
  dist_type <- stringr::str_remove(atb$tibble_type, "tidy_") %>%
    stringr::str_replace_all(pattern = "_", " ") %>%
    stringr::str_to_title()

  # Get column names from the param_grid in order to make teh dist_type column ----
  cols <- names(param_grid)

  dff$dist_name <- paste0(
    paste0(dist_type, " c("),
    apply(dff[, cols], 1, paste0, collapse = ", "),
    ")"
  )

  df_unnested_tbl <- dff %>%
    tidyr::unnest(results) %>%
    dplyr::ungroup() %>%
    dplyr::select(sim_number, dist_name, x:q) %>%
    dplyr::mutate(dist_name = as.factor(dist_name)) %>%
    dplyr::arrange(sim_number, dist_name)

  # Attach attributes ----
  attr(df_unnested_tbl, "all") <- atb
  attr(df_unnested_tbl, "tbl") <- "tidy_multi_tibble"

  # Return ----
  return(df_unnested_tbl)

}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Development

No branches or pull requests

1 participant