Skip to content

Commit

Permalink
Fixes #75
Browse files Browse the repository at this point in the history
  • Loading branch information
spsanderson committed Mar 3, 2022
1 parent f931c08 commit 9841d31
Show file tree
Hide file tree
Showing 7 changed files with 194 additions and 3 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ Imports:
purrr,
stringr,
actuar,
methods
methods,
EnvStats,
healthyR.ai
Suggests:
rmarkdown,
knitr,
Expand Down
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@ export(tidy_zero_truncated_binomial)
export(tidy_zero_truncated_geometric)
export(tidy_zero_truncated_negative_binomial)
export(tidy_zero_truncated_poisson)
export(util_beta_param_estimate)
importFrom(magrittr,"%>%")
importFrom(rlang,":=")
importFrom(rlang,.data)
Expand Down
136 changes: 136 additions & 0 deletions R/est-param-beta.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
#' Estimate Beta Parameters
#'
#' @family Parameter Estimation
#' @family Beta
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This is a wrapper for the [EnvStats::ebeta()] function.
#'
#' @description This function is a wrapper for the [EnvStats::ebeta()] function.
#' It will automatically scale the data from 0 to 1 if it is not already. This means
#' you can pass a vector like `mtcars$mpg` and not worry about it.
#'
#' @param .x The vector of data to be passed to the function. Must be numeric, and
#' all values must be 0 <= x <= 1
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
#' set to TRUE. This will automatically create the `tidy_empirical()` output
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
#' can then plot out the data using `$combined_data_tbl` from the function output.
#'
#' @examples
#' library(dplyr)
#'
#' tb <- tidy_beta(.n = 50, .shape1 = 2.5, .shape2 = 1.4, .ncp = 0) %>%
#' pull(y)
#' util_beta_param_estimate(tb)
#'
#' @return
#' A tibble
#'
#' @export
#'

util_beta_param_estimate <- function(.x, .auto_gen_empirical = TRUE){

# Tidyeval ----
x_term <- as.numeric(.x)
minx <- min(x_term)
maxx <- max(x_term)

# Checks ----
if (length(n) < 2 || length(unique(x_term)) < 2){
rlang::abort(
message = "The data must have at least two (2) unique data points.",
use_cli_format = TRUE
)
}

if (!is.numeric(x_term)){
rlang::abort(
"The '.x' parameter must be numeric."
)
}

if (minx < 0 | maxx > 1){
rlang::inform(
message = "For the beta distribution, its mean 'mu' should be 0 < mu < 1.
The data will therefore be scaled to enforce this.",
use_cli_format = TRUE
)
x_term <- healthyR.ai::hai_scale_zero_one_vec(x_term)
scaled <- TRUE
} else {
rlang::inform(
message = "There was no need to scale the data.",
use_cli_format = TRUE
)
x_term <- x_term
scaled <- FALSE
}

# Get params ----
n <- length(x_term)
m <- mean(x_term, na.rm = TRUE)
s2 <- var(x_term, na.rm = TRUE)

# wikipedia generic
alpha <- m * n
beta <- sqrt(((1- m) * n)^2)

# https://itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
p <- m * (((m * (1- m))/s2) - 1)
q <- (1 - m) * (((m * (1 - m))/s2) - 1)

if (p < 0){
p <- sqrt((p)^2)
}

if (q < 0){
q <- sqrt((q)^2)
}

# EnvStats
term <- ((m * (1 - m))/(((n - 1)/n) * s2)) - 1
esshape1 <- m * term
esshape2 <- (1 - m) * term

# Return Tibble ----
if (.auto_gen_empirical){
te <- tidy_empirical(.x = x_term)
td <- tidy_beta(.n = n, .shape1 = round(p, 3), .shape2 = round(q, 3))
combined_tbl <- tidy_combine_distributions(te, td)
}

ret <- dplyr::tibble(
dist_type = rep('Beta', 3),
samp_size = rep(n, 3),
min = rep(minx, 3),
max = rep(maxx, 3),
mean = rep(m, 3),
variance = rep(s2, 3),
method = c("Bayes", "NIST_MME", "EnvStats_MME"),
shape1 = c(alpha, p, esshape1),
shape2 = c(beta, q, esshape2),
shape_ratio = c(alpha/beta, p/q, esshape1/esshape2)
)

# Return ----
attr(ret, "tibble_typle") <- "beta_parameter_estimation"
attr(ret, "x_term") <- .x
attr(ret, "scaled") <- scaled
attr(ret, "n") <- n

if (.auto_gen_empirical){
output <- list(
combined_data_tbl = combined_tbl,
parameter_tbl = ret
)
} else {
output <- ret
}

return(output)

}

4 changes: 4 additions & 0 deletions _pkgdown.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,10 @@ reference:
desc: Generate multiple simulations of the same distribution with different parameters
contents:
- has_concept("Multiple Distribution")
- title: Parameter Estimation Functions
desc: Functions that help to estimate parameters from raw data.
contents:
- has_concept("Parameter Estimation")
- title: Random Walk Functions
desc: Functions that generate random walk data and return tibbles
contents:
Expand Down
3 changes: 2 additions & 1 deletion man/tidy_beta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 2 additions & 1 deletion man/tidy_generalized_beta.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

46 changes: 46 additions & 0 deletions man/util_beta_param_estimate.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 9841d31

Please sign in to comment.