Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

logis #82

Closed
Tracked by #74
spsanderson opened this issue Mar 3, 2022 · 1 comment
Closed
Tracked by #74

logis #82

spsanderson opened this issue Mar 3, 2022 · 1 comment
Assignees
Labels
enhancement New feature or request

Comments

@spsanderson
Copy link
Owner

No description provided.

@spsanderson spsanderson added the enhancement New feature or request label Mar 4, 2022
@spsanderson spsanderson added this to the TidyDensity 0.0.2 milestone Mar 4, 2022
@spsanderson spsanderson self-assigned this Mar 4, 2022
@spsanderson
Copy link
Owner Author

#' Estimate Logistic Parameters
#'
#' @family Parameter Estimation
#' @family Logistic
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @details This function will attempt to estimate the logistic location and scale
#' parameters given some vector of values.
#'
#' @description The function will return a list output by default, and  if the parameter
#' `.auto_gen_empirical` is set to `TRUE` then the empirical data given to the
#' parameter `.x` will be run through the `tidy_empirical()` function and combined
#' with the estimated beta data.
#'
#' Three different methods of shape parameters are supplied:
#' -  MLE
#' -  MME
#' -  MMUE
#'
#' @param .x The vector of data to be passed to the function. Must be numeric, and
#' all values must be 0 <= x <= 1
#' @param .auto_gen_empirical This is a boolean value of TRUE/FALSE with default
#' set to TRUE. This will automatically create the `tidy_empirical()` output
#' for the `.x` parameter and use the `tidy_combine_distributions()`. The user
#' can then plot out the data using `$combined_data_tbl` from the function output.
#'
#' @examples
#' library(dplyr)
#' library(ggplot2)
#'
#' x <- mtcars$mpg
#' output <- util_logistic_param_estimate(x)
#'
#' output$parameter_tbl
#'
#' output$combined_data_tbl %>%
#'   ggplot(aes(x = dx, y = dy, group = dist_type, color = dist_type)) +
#'   geom_line() +
#'   theme_minimal() +
#'   theme(legend.position = "bottom")
#'
#' t <- rlogis(50, 2.5, 1.4)
#' util_logistic_param_estimate(t)$parameter_tbl
#'
#' @return
#' A tibble/list
#'
#' @export
#'

util_logistic_param_estimate <- function(.x, .auto_gen_empirical = TRUE){
  
  # Tidyeval ----
  x_term <- as.numeric(.x)
  minx <- min(x_term)
  maxx <- max(x_term)
  n <- length(x_term)
  unique_terms <- length(unique(x_term))
  location <- mean(x_term, na.rm = TRUE)
  scale <- (sqrt((n - 1)/n) * sd(x_term) * sqrt(3))/pi
  
  # Checks ----
  if (n < 2 || unique_terms < 2){
    rlang::abort(
      message = "The data must have at least two (2) unique data points.",
      use_cli_format = TRUE
    )
  }
  
  # Get params ----
  # EnvStats
  es_mme_location <- location
  es_mme_scale <- scale
  
  es_mmue_location <- location
  es_mmue_scale <- (sd(x_term) * sqrt(3))/pi
  
  # MLE
  mle_fx <- function(theta, y){
    a <- theta[1]
    b <- theta[2]
    c <- (y - 1)/b
    sum(c + log(b) + 2 * log(1 + exp(-c)))
  }
  
  mle_params <- nlminb(
    start = c(location, scale),
    objective = mle_fx,
    lower = c(-Inf, .Machine$double.eps), y = x_term)$par
  
  names(mle_params) <- c("es_mle_location","es_mle_scale")

  es_mle_location <- mle_params[[1]]
  es_mle_scale <- mle_params[[2]]
  
  # Return Tibble ----
  if (.auto_gen_empirical){
    te <- tidy_empirical(.x = x_term)
    td <- tidy_logistic(.n = n, .location = round(es_mme_location, 3), 
                        .scale = round(es_mme_scale, 3))
    combined_tbl <- tidy_combine_distributions(te, td)
  }
  
  ret <- dplyr::tibble(
    dist_type = rep('Logistic', 3),
    samp_size = rep(n, 3),
    min = rep(minx, 3),
    max = rep(maxx, 3),
    mean = rep(location, 3),
    basic_scale = rep(scale, 3),
    method = c("EnvStats_MME", "EnvStats_MMUE", "EnvStats_MLE"),
    location = c(es_mme_location, es_mmue_location, es_mle_location),
    scale = c(es_mme_scale, es_mmue_scale, es_mle_scale),
    shape_ratio = c(es_mme_location/es_mme_scale, es_mmue_location/es_mmue_scale,
                    es_mle_location/es_mle_scale)
  )
  
  # Return ----
  attr(ret, "tibble_type") <- "parameter_estimation"
  attr(ret, "family") <- "logistic"
  attr(ret, "x_term") <- .x
  attr(ret, "n") <- n
  attr(ret, "base_location") <- location
  attr(reg, "base_scale") <- scale
  
  if (.auto_gen_empirical){
    output <- list(
      combined_data_tbl = combined_tbl,
      parameter_tbl     = ret
    )
  } else {
    output <- list(
      parameter_tbl = ret
    )
  }
  
  return(output)
  
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
enhancement New feature or request
Development

No branches or pull requests

1 participant