New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
beta #75
Comments
12 tasks
Here is the code: util_beta_param_estimate <- function(.x, .auto_gen_with_empirical = TRUE){
# Tidyeval ----
x_term <- as.numeric(.x)
minx <- min(x_term)
maxx <- max(x_term)
# Checks ----
if (!is.numeric(x_term)){
rlang::abort(
"The '.x' parameter must be numeric."
)
}
if (minx < 0 | maxx > 1){
rlang::inform(
message = "For the beta distribution, its mean 'mu' should be 0 < mu < 1.
The data will therefore be scaled to enforce this.",
use_cli_format = TRUE
)
x_term <- healthyR.ai::hai_scale_zero_one_vec(x_term)
scaled <- TRUE
} else {
rlang::inform(
message = "There was no need to scale the data.",
use_cli_format = TRUE
)
x_term <- x_term
scaled <- FALSE
}
# Get params ----
n <- length(x_term)
m <- mean(x_term, na.rm = TRUE)
s2 <- var(x_term, na.rm = TRUE)
# wikipedia generic
alpha <- m * n
beta <- sqrt(((1- m) * n)^2)
# https://itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
p <- m * (((m * (1- m))/s2) - 1)
q <- (1 - m) * (((m * (1 - m))/s2) - 1)
if (p < 0){
p <- sqrt((p)^2)
}
if (q < 0){
q <- sqrt((q)^2)
}
# EnvStats
term <- ((m * (1 - m))/(((n - 1)/n) * s2)) - 1
esshape1 <- m * term
esshape2 <- (1 - m) * term
# Return Tibble ----
if (.auto_gen_with_empirical){
te <- tidy_empirical(.x = x_term)
td <- tidy_beta(.n = n, .shape1 = p, .shape2 = q)
combined_tbl <- tidy_combine_distributions(te, td)
}
ret <- dplyr::tibble(
dist_type = rep('Beta', 3),
samp_size = rep(n, 3),
min = rep(minx, 3),
max = rep(maxx, 3),
mean = rep(m, 3),
variance = rep(s2, 3),
method = c("Bayes", "NIST_MME", "EnvStats_MME"),
shape1 = c(alpha, p, esshape1),
shape2 = c(beta, q, esshape2),
shape_ratio = c(alpha/beta, p/q, esshape1/esshape2)
)
# Return ----
attr(ret, "tibble_typle") <- "beta_parameter_estimation"
attr(ret, "x_term") <- .x
attr(ret, "scaled") <- scaled
attr(ret, "n") <- n
if (.auto_gen_with_empirical){
output <- list(
combined_tbl,
ret
)
} else {
output <- ret
}
return(output)
} Here is a working example: alpha <- 2.5
beta <- 0.5
tb <- tidy_beta(.n = 32, .shape1 = alpha, .shape2 = beta, .num_sims = 1) %>%
group_by(x) %>% # groups by each observation, if sim_number then takes mean of the sim
summarise(y = mean(y)) %>%
ungroup() %>%
pull(y)
> params <- util_beta_param_estimate(tb)
There was no need to scale the data.
> params
# A tibble: 3 x 10
dist_type samp_size min max mean variance method shape1 shape2 shape_ratio
<chr> <int> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl>
1 Beta 32 0.212 1.00 0.798 0.0539 Bayes 25.5 6.48 3.94
2 Beta 32 0.212 1.00 0.798 0.0539 NIST_MME 1.59 0.404 3.94
3 Beta 32 0.212 1.00 0.798 0.0539 EnvStats_MME 1.67 0.424 3.94
> attributes(params)
$class
[1] "tbl_df" "tbl" "data.frame"
$row.names
[1] 1 2 3
$names
[1] "dist_type" "samp_size" "min" "max" "mean" "variance"
[7] "method" "shape1" "shape2" "shape_ratio"
$tibble_typle
[1] "beta_parameter_estimation"
$x_term
[1] 0.9935692 0.5702770 0.9768198 0.9835949 0.9743270 0.9933977 0.8366687 0.9911934 0.2116736
[10] 0.9996197 0.6960581 0.8134849 0.9998215 0.7535200 0.5729446 0.9466399 0.9934446 0.9822580
[19] 0.4181436 0.5219418 0.9457714 0.4061314 0.6482816 0.9421323 0.9943604 0.7716284 0.7928856
[28] 0.7834011 0.9999155 0.8963023 0.8547636 0.2587532
$scaled
[1] FALSE
$n
[1] 32 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
No description provided.
The text was updated successfully, but these errors were encountered: