-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
- Loading branch information
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -36,7 +36,8 @@ Imports: | |
nloptr, | ||
broom, | ||
tidyselect, | ||
data.table | ||
data.table, | ||
bbmle | ||
Suggests: | ||
rmarkdown, | ||
knitr, | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,164 @@ | ||
#' Estimate Chisquare Parameters | ||
#' | ||
#' @family Parameter Estimation | ||
#' @family Chisquare | ||
#' | ||
#' @author Steven P. Sanderson II, MPH | ||
#' | ||
#' @details This function will see if the given vector `.x` is a numeric vector. | ||
#' It will attempt to estimate the prob parameter of a Chisquare distribution. | ||
#' The function first performs tidyeval on the input data to ensure it's a | ||
#' numeric vector. It then checks if there are at least two data points, as this | ||
#' is a requirement for parameter estimation. | ||
#' | ||
#' The estimation of the chi-square distribution parameters is performed using | ||
#' maximum likelihood estimation (MLE) implemented with the `bbmle` package. | ||
#' The negative log-likelihood function is minimized to obtain the estimates for | ||
#' the degrees of freedom (\code{doff}) and the non-centrality parameter (\code{ncp}). | ||
#' Initial values for the optimization are set based on the sample variance and | ||
#' mean, but these can be adjusted if necessary. | ||
#' | ||
#' If the estimation fails or encounters an error, the function returns \code{NA} | ||
#' for both \code{doff} and \code{ncp}. | ||
#' | ||
#' Finally, the function returns a tibble containing the following information: | ||
#' \describe{ | ||
#' \item{dist_type}{The type of distribution, which is "Chisquare" in this case.} | ||
#' \item{samp_size}{The sample size, i.e., the number of data points in the input vector.} | ||
#' \item{min}{The minimum value of the data points.} | ||
#' \item{max}{The maximum value of the data points.} | ||
#' \item{mean}{The mean of the data points.} | ||
#' \item{degrees_of_freedom}{The estimated degrees of freedom (\code{doff}) for the chi-square distribution.} | ||
#' \item{ncp}{The estimated non-centrality parameter (\code{ncp}) for the chi-square distribution.} | ||
#' } | ||
#' | ||
#' Additionally, if the argument \code{.auto_gen_empirical} is set to \code{TRUE} | ||
#' (which is the default behavior), the function also returns a combined tibble | ||
#' containing both empirical and chi-square distribution data, obtained by | ||
#' calling \code{tidy_empirical} and \code{tidy_chisquare}, respectively. | ||
#' | ||
#' @description This function will attempt to estimate the Chisquare prob parameter | ||
#' given some vector of values `.x`. The function will return a list output by default, | ||
#' and if the parameter `.auto_gen_empirical` is set to `TRUE` then the empirical | ||
#' data given to the parameter `.x` will be run through the `tidy_empirical()` | ||
#' function and combined with the estimated Chisquare data. | ||
#' | ||
#' @param .x The vector of data to be passed to the function. Must be non-negative | ||
#' integers. | ||
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default | ||
#' set to TRUE. This will automatically create the `tidy_empirical()` output | ||
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user | ||
#' can then plot out the data using `$combined_data_tbl` from the function output. | ||
#' | ||
#' @examples | ||
#' library(dplyr) | ||
#' library(ggplot2) | ||
#' | ||
#' tc <- tidy_chisquare(.n = 500, .df = 6, .ncp = 1) |> pull(y) | ||
#' output <- util_chisquare_param_estimate(tc) | ||
#' | ||
#' output$parameter_tbl | ||
#' | ||
#' output$combined_data_tbl |> | ||
#' tidy_combined_autoplot() | ||
#' | ||
#' @return | ||
#' A tibble/list | ||
#' | ||
#' @name util_chisquare_param_estimate | ||
NULL | ||
|
||
#' @export | ||
#' @rdname util_chisquare_param_estimate | ||
|
||
util_chisquare_param_estimate <- function(.x, .auto_gen_empirical = TRUE) { | ||
|
||
# Tidyeval ---- | ||
x_term <- as.numeric(.x) | ||
n <- length(x_term) | ||
minx <- min(as.numeric(x_term)) | ||
maxx <- max(as.numeric(x_term)) | ||
|
||
# Checks ---- | ||
if (!is.vector(x_term, mode = "numeric")) { | ||
rlang::abort( | ||
message = "The '.x' term must be a numeric vector.", | ||
use_cli_format = TRUE | ||
) | ||
} | ||
|
||
if (n < 2) { | ||
rlang::abort( | ||
message = "You must supply at least two data points for this function.", | ||
use_cli_format = TRUE | ||
) | ||
} | ||
|
||
# Parameters ---- | ||
estimate_chisq_params <- function(data) { | ||
# Negative log-likelihood function | ||
negLogLik <- function(df, ncp) { | ||
-sum(stats::dchisq(data, df = df, ncp = ncp, log = TRUE)) | ||
} | ||
|
||
# Initial values (adjust based on your data if necessary) | ||
start_vals <- list(df = trunc(var(data)/2), ncp = trunc(mean(data))) | ||
|
||
# MLE using bbmle | ||
mle_fit <- bbmle::mle2(negLogLik, start = start_vals) | ||
# Return estimated parameters as a named vector | ||
df <- dplyr::tibble( | ||
est_df = bbmle::coef(mle_fit)[1], | ||
est_ncp = bbmle::coef(mle_fit)[2] | ||
) | ||
return(df) | ||
} | ||
|
||
safe_estimates <- { | ||
purrr::possibly( | ||
estimate_chisq_params, | ||
otherwise = NA_real_, | ||
quiet = TRUE | ||
) | ||
} | ||
|
||
estimates <- safe_estimates(x_term) | ||
doff <- estimates$est_df |> unname() | ||
ncp <- estimates$est_ncp |> unname() | ||
|
||
# Return Tibble ---- | ||
if (.auto_gen_empirical) { | ||
te <- tidy_empirical(.x = x_term) | ||
tc <- tidy_chisquare(.n = n, .df = round(doff, 3), .ncp = round(ncp, 3)) | ||
combined_tbl <- tidy_combine_distributions(te, tc) | ||
} | ||
|
||
ret <- dplyr::tibble( | ||
dist_type = "Chisquare", | ||
samp_size = n, | ||
min = minx, | ||
max = maxx, | ||
mean = mean(x_term), | ||
degrees_of_freedom = doff, | ||
ncp = ncp | ||
) | ||
|
||
# Return ---- | ||
attr(ret, "tibble_type") <- "parameter_estimation" | ||
attr(ret, "family") <- "chisquare" | ||
attr(ret, "x_term") <- .x | ||
attr(ret, "n") <- n | ||
|
||
if (.auto_gen_empirical) { | ||
output <- list( | ||
combined_data_tbl = combined_tbl, | ||
parameter_tbl = ret | ||
) | ||
} else { | ||
output <- list( | ||
parameter_tbl = ret | ||
) | ||
} | ||
|
||
return(output) | ||
} |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.