Inverse Burr, also need param_estimate and stats_tbl #475

spsanderson · 2024-05-03T15:41:38Z

Param Estimate

Function:

#' Estimate Inverse Burr Parameters
#'
#' @family Parameter Estimation
#' @family Inverse Burr
#'
#' @details This function will see if the given vector `.x` is a numeric vector.
#' It will attempt to estimate the shape1, shape2, and rate parameters of an inverse 
#' Burr distribution.
#'
#' @description This function will attempt to estimate the inverse Burr shape1, shape2, and rate parameters
#' given some vector of values `.x`. The function will return a list output by default,
#' and if the parameter `.auto_gen_empirical` is set to `TRUE` then the empirical
#' data given to the parameter `.x` will be run through the `tidy_empirical()`
#' function and combined with the estimated inverse Burr data.
#'
#' @param .x The vector of data to be passed to the function. Must be non-negative
#' integers.
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
#' set to TRUE. This will automatically create the `tidy_empirical()` output
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
#' can then plot out the data using `$combined_data_tbl` from the function output.
#'
#' @examples
#' library(dplyr)
#' library(ggplot2)
#'
#' set.seed(123)
#' tb <- tidy_burr(.shape1 = 1, .shape2 = 2, .rate = .3) |> pull(y)
#' output <- util_inverse_burr_param_estimate(tb)
#'
#' output$parameter_tbl
#'
#' output$combined_data_tbl |>
#'   tidy_combined_autoplot()
#'
#' @return
#' A tibble/list
#'
#' @export
#'

util_inverse_burr_param_estimate <- function(.x, .auto_gen_empirical = TRUE) {
  
  # Tidyeval ----
  x_term <- as.numeric(.x)
  n <- length(x_term)
  
  # Checks ----
  if (!is.vector(x_term, mode = "numeric")) {
    rlang::abort(
      message = "The '.x' term must be a numeric vector.",
      use_cli_format = TRUE
    )
  }
  
  if (any(x_term < 0)) {
    rlang::abort(
      message = "All values of '.x' must be non-negative integers greater than 0.",
      use_cli_format = TRUE
    )
  }
  
  if (n < 2) {
    rlang::abort(
      message = "You must supply at least two data points for this function.",
      use_cli_format = TRUE
    )
  }
  
  # Negative log-likelihood function for inverse Burr distribution
  invburr_lik <- function(params, data) {
    shape1 <- params[1]
    shape2 <- params[2]
    scale <- params[3]
    -sum(actuar::dinvburr(data, shape1 = shape1, shape2 = shape2, scale = scale, log = TRUE))
  }
  
  # Initial parameter guesses
  initial_params <- c(shape1 = 1, shape2 = 1, scale = 1)
  
  # Optimize to minimize the negative log-likelihood
  opt_result <- stats::optim(
    par = initial_params,
    fn = invburr_lik,
    data = x_term,
    method = "L-BFGS-B",
    lower = c(1e-5, 1e-5, 1e-5)
  )
  
  shape1 <- opt_result$par[1]
  shape2 <- opt_result$par[2]
  scale <- opt_result$par[3]
  rate <- 1 / scale
  
  # Return Tibble ----
  if (.auto_gen_empirical) {
    te <- tidy_empirical(.x = x_term)
    td <- tidy_burr(.n = n, .shape1 = round(shape1, 3), .shape2 = round(shape2, 3), .rate = round(rate, 3))
    combined_tbl <- tidy_combine_distributions(te, td)
  }
  
  ret <- dplyr::tibble(
    dist_type = "Inverse Burr",
    samp_size = n,
    min = min(x_term),
    max = max(x_term),
    mean = mean(x_term),
    shape1 = shape1,
    shape2 = shape2,
    rate = rate,
    scale = scale
  )
  
  # Return ----
  attr(ret, "tibble_type") <- "parameter_estimation"
  attr(ret, "family") <- "inverse_burr"
  attr(ret, "x_term") <- .x
  attr(ret, "n") <- n
  
  if (.auto_gen_empirical) {
    output <- list(
      combined_data_tbl = combined_tbl,
      parameter_tbl     = ret
    )
  } else {
    output <- list(
      parameter_tbl = ret
    )
  }
  
  return(output)
}

Example:

> set.seed(123)
> tb <- tidy_burr(.shape1 = 1, .shape2 = 2, .rate = .3) |> pull(y)
> output <- util_inverse_burr_param_estimate(tb)
> output$parameter_tbl
# A tibble: 1 × 9
  dist_type    samp_size   min   max  mean shape1 shape2  rate scale
  <chr>            <int> <dbl> <dbl> <dbl>  <dbl>  <dbl> <dbl> <dbl>
1 Inverse Burr        50 0.253  21.0  4.38  0.692   2.32 0.245  4.08

Stats Tibble

Function:

#' Distribution Statistics
#'
#' @family Inverse Burr
#' @family Distribution Statistics
#' 
#' @author Steven P. Sanderson II, MPH
#'
#' @details This function will take in a tibble and returns the statistics
#' of the given type of `tidy_` distribution. It is required that data be
#' passed from a `tidy_` distribution function.
#'
#' @description Returns distribution statistics in a tibble.
#'
#' @param .data The data being passed from a `tidy_` distribution function.
#'
#' @examples
#' library(dplyr)
#'
#' set.seed(123)
#' tidy_inverse_burr() |>
#'   util_inverse_burr_stats_tbl() |>
#'   glimpse()
#'
#' @return
#' A tibble
#'
#' @name util_inverse_burr_stats_tbl
NULL

#' @export
#' @rdname util_inverse_burr_stats_tbl

util_inverse_burr_stats_tbl <- function(.data) {
  
  # Immediate check for tidy_ distribution function
  if (!"tibble_type" %in% names(attributes(.data))) {
    rlang::abort(
      message = "You must pass data from the 'tidy_dist' function.",
      use_cli_format = TRUE
    )
  }
  
  if (attributes(.data)$tibble_type != "tidy_inverse_burr") {
    rlang::abort(
      message = "You must use 'tidy_inverse_burr()'",
      use_cli_format = TRUE
    )
  }
  
  # Data
  data_tbl <- dplyr::as_tibble(.data)
  
  atb <- attributes(data_tbl)
  s1 <- atb$.shape1
  s2 <- atb$.shape2
  r  <- atb$.rate
  sc <- 1/r
  
  stat_mean <- ifelse(s1 <= 1, Inf, sc * gamma(1 - 1/s1) * gamma(s2 + 1/s1) / gamma(s2))
  stat_mode <- sc * ((s2 - 1)/(s1 * s2 + 1))^(1/s2)
  stat_median <- sc * actuar::qinvburr(0.5, shape1 = s1, shape2 = s2, scale = sc)
  stat_var <- ifelse(s1 <= 2, Inf, sc^2 * (gamma(1 - 2/s1) * gamma(s2 + 2/s1) / gamma(s2) - (gamma(1 - 1/s1) * gamma(s2 + 1/s1) / gamma(s2))^2))
  stat_skewness <- ifelse(s1 <= 3, "undefined", (2 * (gamma(1 - 1/s1)^3 * gamma(s2 + 1/s1)^3 - 3 * gamma(1 - 1/s1) * gamma(1 - 2/s1) * gamma(s2 + 1/s1) * gamma(s2 + 2/s1) + gamma(1 - 3/s1) * gamma(s2 + 3/s1)) / (gamma(1 - 1/s1) * gamma(s2 + 1/s1) - gamma(1 - 2/s1) * gamma(s2 + 2/s1))^(3/2)))
  stat_kurtosis <- ifelse(s1 <= 4, "undefined", (gamma(1 - 4/s1) * gamma(s2 + 4/s1) - 4 * gamma(1 - 3/s1) * gamma(s2 + 3/s1) * gamma(1 - 1/s1) * gamma(s2 + 1/s1) + 6 * gamma(1 - 2/s1) * gamma(s2 + 2/s1) * gamma(1 - 1/s1)^2 * gamma(s2 + 1/s1)^2 - 3 * gamma(1 - 2/s1)^2 * gamma(s2 + 2/s1)^2) / (gamma(1 - 1/s1) * gamma(s2 + 1/s1) - gamma(1 - 2/s1) * gamma(s2 + 2/s1))^2)
  
  # Data Tibble
  ret <- dplyr::tibble(
    tidy_function = atb$tibble_type,
    function_call = atb$dist_with_params,
    distribution = dist_type_extractor(atb$tibble_type),
    distribution_type = atb$distribution_family_type,
    points = atb$.n,
    simulations = atb$.num_sims,
    mean = stat_mean,
    mode = stat_mode,
    median = stat_median,
    coeff_var = sqrt(stat_var)/stat_mean,
    skewness = stat_skewness,
    kurtosis = stat_kurtosis,
    computed_std_skew = tidy_skewness_vec(data_tbl$y),
    computed_std_kurt = tidy_kurtosis_vec(data_tbl$y),
    ci_lo = ci_lo(data_tbl$y),
    ci_hi = ci_hi(data_tbl$y)
  )
  
  # Return
  return(ret)
}

Example:

> set.seed(123)
> tidy_inverse_burr() |>
+   util_inverse_burr_stats_tbl() |>
+   glimpse()
Rows: 1
Columns: 16
$ tidy_function     <chr> "tidy_inverse_burr"
$ function_call     <chr> "Inverse Burr c(1, 1, 1, 1)"
$ distribution      <chr> "Inverse Burr"
$ distribution_type <chr> "continuous"
$ points            <dbl> 50
$ simulations       <dbl> 1
$ mean              <dbl> Inf
$ mode              <dbl> 0
$ median            <dbl> 1
$ coeff_var         <dbl> NaN
$ skewness          <chr> "undefined"
$ kurtosis          <chr> "undefined"
$ computed_std_skew <dbl> 6.286574
$ computed_std_kurt <dbl> 42.69436
$ ci_lo             <dbl> 0.04476678
$ ci_hi             <dbl> 25.17203

AIC

Function:

#' Calculate Akaike Information Criterion (AIC) for Inverse Burr Distribution
#'
#' This function calculates the Akaike Information Criterion (AIC) for an inverse Burr
#' distribution fitted to the provided data.
#'
#' @family Utility
#' 
#' @author Steven P. Sanderson II, MPH
#'
#' @description
#' This function estimates the shape1, shape2, and rate parameters of an inverse Burr distribution
#' from the provided data using maximum likelihood estimation,
#' and then calculates the AIC value based on the fitted distribution.
#'
#' @param .x A numeric vector containing the data to be fitted to an inverse Burr distribution.
#'
#' @details
#' This function fits an inverse Burr distribution to the provided data using maximum
#' likelihood estimation. It estimates the shape1, shape2, and rate parameters
#' of the inverse Burr distribution using maximum likelihood estimation. Then, it
#' calculates the AIC value based on the fitted distribution.
#'
#' Initial parameter estimates: The function uses the method of moments estimates
#' as starting points for the shape1, shape2, and rate parameters of the inverse Burr distribution.
#'
#' Optimization method: The function uses the optim function for optimization.
#' You might explore different optimization methods within optim for potentially
#' better performance.
#'
#' Goodness-of-fit: While AIC is a useful metric for model comparison, it's recommended
#' to also assess the goodness-of-fit of the chosen model using visualization
#' and other statistical tests.
#'
#' @examples
#' # Example 1: Calculate AIC for a sample dataset
#' set.seed(123)
#' x <- tidy_inverse_burr(100, .shape1 = 2, .shape2 = 3, .scale = 1)[["y"]]
#' util_inverse_burr_aic(x)
#'
#' @return
#' The AIC value calculated based on the fitted inverse Burr distribution to the provided data.
#'
#' @name util_inverse_burr_aic
NULL

#' @export
#' @rdname util_inverse_burr_aic
util_inverse_burr_aic <- function(.x) {
  # Tidyeval
  x <- as.numeric(.x)
  
  # Negative log-likelihood function for inverse Burr distribution
  neg_log_lik_invburr <- function(par, data) {
    shape1 <- par[1]
    shape2 <- par[2]
    scale <- par[3]
    -sum(actuar::dinvburr(data, shape1 = shape1, shape2 = shape2, scale = scale, log = TRUE))
  }
  
  # Initial parameter estimates
  initial_params <- c(shape1 = 1, shape2 = 1, scale = 1)
  
  # Fit inverse Burr distribution using optim
  fit_invburr <- stats::optim(
    par = initial_params,
    fn = neg_log_lik_invburr,
    data = x,
    method = "L-BFGS-B",
    lower = c(1e-5, 1e-5, 1e-5)
  )
  
  # Extract log-likelihood and number of parameters
  logLik_invburr <- -fit_invburr$value
  k_invburr <- 3 # Number of parameters for inverse Burr distribution (shape1, shape2, and scale)
  
  # Calculate AIC
  AIC_invburr <- 2 * k_invburr - 2 * logLik_invburr
  
  # Return AIC
  return(AIC_invburr)
}

Example:

> set.seed(123)
> x <- tidy_inverse_burr(100, .shape1 = 2, .shape2 = 3, .scale = 1)[["y"]]
> util_inverse_burr_aic(x)
[1] 206.2411

Fixes #475

spsanderson mentioned this issue May 3, 2024

New AIC/Param Estimate/Stats Tbl functions #467

Closed

15 tasks

spsanderson self-assigned this May 15, 2024

spsanderson added the enhancement New feature or request label May 15, 2024

spsanderson added this to the TidyDensity 1.4.1 milestone May 15, 2024

spsanderson closed this as completed in d73ea77 May 15, 2024

spsanderson added a commit that referenced this issue May 15, 2024

Merge pull request #499 from spsanderson/development

babf3e4

Fixes #475

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Inverse Burr, also need param_estimate and stats_tbl #475

Inverse Burr, also need param_estimate and stats_tbl #475

spsanderson commented May 3, 2024 •

edited

Loading

Inverse Burr, also need param_estimate and stats_tbl #475

Inverse Burr, also need param_estimate and stats_tbl #475

Comments

spsanderson commented May 3, 2024 • edited Loading

Param Estimate

Stats Tibble

AIC

spsanderson commented May 3, 2024 •

edited

Loading