Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

beta #75

Closed
Tracked by #74
spsanderson opened this issue Mar 3, 2022 · 1 comment
Closed
Tracked by #74

beta #75

spsanderson opened this issue Mar 3, 2022 · 1 comment

Comments

@spsanderson
Copy link
Owner

No description provided.

@spsanderson
Copy link
Owner Author

spsanderson commented Mar 3, 2022

Here is the code:

util_beta_param_estimate <- function(.x, .auto_gen_with_empirical = TRUE){
  
  # Tidyeval ----
  x_term <- as.numeric(.x)
  minx <- min(x_term)
  maxx <- max(x_term)
  
  # Checks ----
  if (!is.numeric(x_term)){
    rlang::abort(
      "The '.x' parameter must be numeric."
    )
  }
  
  if (minx < 0 | maxx > 1){
    rlang::inform(
      message = "For the beta distribution, its mean 'mu' should be 0 < mu < 1. 
      The data will therefore be scaled to enforce this.",
      use_cli_format = TRUE
    )
    x_term <- healthyR.ai::hai_scale_zero_one_vec(x_term)
    scaled <- TRUE
  } else {
    rlang::inform(
      message = "There was no need to scale the data.",
      use_cli_format = TRUE
    )
    x_term <- x_term
    scaled <- FALSE
  }
  
  # Get params ----
  n <- length(x_term)
  m <- mean(x_term, na.rm = TRUE)
  s2 <- var(x_term, na.rm = TRUE)
  
  # wikipedia generic
  alpha <- m * n
  beta <- sqrt(((1- m) * n)^2)
  
  # https://itl.nist.gov/div898/handbook/eda/section3/eda366h.htm
  p <- m * (((m * (1- m))/s2) - 1)
  q <- (1 - m) * (((m * (1 - m))/s2) - 1)
  
  if (p < 0){
    p <- sqrt((p)^2)
  }
  
  if (q < 0){
    q <- sqrt((q)^2)
  }
  
  # EnvStats
  term <- ((m * (1 - m))/(((n - 1)/n) * s2)) - 1
  esshape1 <- m * term
  esshape2 <- (1 - m) * term
  
  # Return Tibble ----
  if (.auto_gen_with_empirical){
    te <- tidy_empirical(.x = x_term)
    td <- tidy_beta(.n = n, .shape1 = p, .shape2 = q)
    combined_tbl <- tidy_combine_distributions(te, td)
  }
  
  ret <- dplyr::tibble(
    dist_type = rep('Beta', 3),
    samp_size = rep(n, 3),
    min = rep(minx, 3),
    max = rep(maxx, 3),
    mean = rep(m, 3),
    variance = rep(s2, 3),
    method = c("Bayes", "NIST_MME", "EnvStats_MME"),
    shape1 = c(alpha, p, esshape1),
    shape2 = c(beta, q, esshape2),
    shape_ratio = c(alpha/beta, p/q, esshape1/esshape2)
  )
  
  # Return ----
  attr(ret, "tibble_typle") <- "beta_parameter_estimation"
  attr(ret, "x_term") <- .x
  attr(ret, "scaled") <- scaled
  attr(ret, "n") <- n
  
  if (.auto_gen_with_empirical){
    output <- list(
      combined_tbl,
      ret
    )
  } else {
    output <- ret
  }
  
  return(output)
  
}

Here is a working example:

alpha <- 2.5
beta <- 0.5
tb <- tidy_beta(.n = 32, .shape1 = alpha, .shape2 = beta, .num_sims = 1) %>%
  group_by(x) %>% # groups by each observation, if sim_number then takes mean of the sim
  summarise(y = mean(y)) %>%
  ungroup() %>%
  pull(y)
> params <- util_beta_param_estimate(tb)
There was no need to scale the data.
> params
# A tibble: 3 x 10
  dist_type samp_size   min   max  mean variance method       shape1 shape2 shape_ratio
  <chr>         <int> <dbl> <dbl> <dbl>    <dbl> <chr>         <dbl>  <dbl>       <dbl>
1 Beta             32 0.212  1.00 0.798   0.0539 Bayes         25.5   6.48         3.94
2 Beta             32 0.212  1.00 0.798   0.0539 NIST_MME       1.59  0.404        3.94
3 Beta             32 0.212  1.00 0.798   0.0539 EnvStats_MME   1.67  0.424        3.94
> attributes(params)
$class
[1] "tbl_df"     "tbl"        "data.frame"

$row.names
[1] 1 2 3

$names
 [1] "dist_type"   "samp_size"   "min"         "max"         "mean"        "variance"   
 [7] "method"      "shape1"      "shape2"      "shape_ratio"

$tibble_typle
[1] "beta_parameter_estimation"

$x_term
 [1] 0.9935692 0.5702770 0.9768198 0.9835949 0.9743270 0.9933977 0.8366687 0.9911934 0.2116736
[10] 0.9996197 0.6960581 0.8134849 0.9998215 0.7535200 0.5729446 0.9466399 0.9934446 0.9822580
[19] 0.4181436 0.5219418 0.9457714 0.4061314 0.6482816 0.9421323 0.9943604 0.7716284 0.7928856
[28] 0.7834011 0.9999155 0.8963023 0.8547636 0.2587532

$scaled
[1] FALSE

$n
[1] 32

spsanderson added a commit that referenced this issue Mar 3, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant