Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
83cd8bf
Add p_value() and get_pvalue()
ismayc Jun 13, 2018
c6ceae3
Update documentation
ismayc Jun 13, 2018
20bba39
Add p_value tests
ismayc Jun 13, 2018
af4649c
Update pkgdown and vignettes
ismayc Jun 13, 2018
b9fd330
After {conflicted} check
ismayc Jun 13, 2018
55c417e
After {conflicted} check
ismayc Jun 13, 2018
a55cdf8
Use alias like Yihui did in {xaringan}
ismayc Jun 13, 2018
22f210b
Use alias like Yihui did in {xaringan}
ismayc Jun 13, 2018
f11fb6e
Update pkgdown
ismayc Jun 13, 2018
d4c4057
Update t_test
ismayc Jun 13, 2018
7ae4bcf
Update t_test
ismayc Jun 14, 2018
6561429
Add infer hex
ismayc Jun 14, 2018
dc19542
Fix t.test call
ismayc Jun 14, 2018
660957f
Switch back to x in t.test
ismayc Jun 14, 2018
182c7e9
Add conf_int functions
ismayc Jun 14, 2018
3317999
Update news and to-do
ismayc Jun 14, 2018
dae5513
Implement conf_int and aliases
ismayc Jun 14, 2018
b72ee78
Add visualize for CI
ismayc Jun 14, 2018
c3e0a45
Update pkgdown and vignettes
ismayc Jun 14, 2018
fa3b88d
Add tests for conf_int and visualize
ismayc Jun 14, 2018
7c132b9
Add conf_int manual
ismayc Jun 14, 2018
e2220c8
Add ::
ismayc Jun 15, 2018
ac05907
Remove unneccessary endpoints_color
ismayc Jun 15, 2018
83451d0
Update tests and pkgdown
ismayc Jun 15, 2018
0df5eb3
Update TO-DO.md
ismayc Jun 17, 2018
4a67c39
Add generic multiplier instead of 2 for 2 * SE
ismayc Jun 17, 2018
0351698
Merge branch 'p_value' of https://github.com/andrewpbray/infer into p…
ismayc Jun 17, 2018
6c383c0
Added conf_int and conf_level arguments to t_test
ismayc Jul 5, 2018
3d8cfdf
Remove pkgdown build in docs folder
ismayc Jul 7, 2018
ad6046c
Remove okcupiddata from DESCRIPTION
ismayc Jul 7, 2018
a9654d2
Get to <80 char per line
ismayc Jul 7, 2018
19b96a1
Rebuild vignettes
ismayc Jul 7, 2018
27e6557
Pump pkg version
ismayc Jul 7, 2018
72e47d0
Remove built vignettes to avoid merge conflict
ismayc Jul 7, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: infer
Type: Package
Title: Tidy Statistical Inference
Version: 0.2.1
Version: 0.3.0
Authors@R: c(
person("Andrew", "Bray", email = "abray@reed.edu", role = c("aut", "cre")),
person("Chester", "Ismay", email = "chester.ismay@gmail.com", role = "aut"),
Expand Down Expand Up @@ -36,8 +36,7 @@ Suggests:
nycflights13,
stringr,
testthat,
covr,
okcupiddata
covr
URL: https://github.com/andrewpbray/infer
BugReports: https://github.com/andrewpbray/infer/issues
RoxygenNote: 6.0.1
Expand Down
5 changes: 5 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ export("%>%")
export(calculate)
export(chisq_stat)
export(chisq_test)
export(conf_int)
export(generate)
export(get_ci)
export(get_confidence_interval)
export(get_pvalue)
export(hypothesize)
export(p_value)
export(rep_sample_n)
export(specify)
export(t_stat)
Expand Down
15 changes: 14 additions & 1 deletion NEWS.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
# infer 0.2.1

- Implemented chi-square Goodness of Fit observed stat depending on `params` being set in `hypothesize` with `specify() %>% calculate()` shortcut
- Added `conf_int` logical argument and `conf_level` argument to `t_test()`
- Switched `shade_color` argument in `visualize()` to be `pvalue_fill` instead
since fill color for confidence intervals is also added now
- Shading for Confidence Intervals in `visualize()`
- Green is default color for CI and red for p-values
- `direction = "between"` to get the green shading
- Currently working only for simulation-based methods
- Implemented `conf_int()` function for computing confidence interval provided a simulation-based method with a `stat` variable
- `get_ci()` and `get_confidence_interval()` are aliases for `conf_int()`
- Converted longer confidence interval calculation code in vignettes to use `get_ci()` instead
- Implemented `p_value()` function for computing p-value provided a simulation-based method with a `stat` variable
- `get_pvalue()` is an alias for `p_value()`
- Converted longer p-value calculation code in vignettes to use `get_pvalue()` instead
- Implemented Chi-square Goodness of Fit observed stat depending on `params` being set in `hypothesize` with `specify() %>% calculate()` shortcut
- Removed "standardized" slope $t$ since its formula is different than "standardized" correlation and there is no way currently to give one over the other
- Implemented correlation with bootstrap CI and permutation hypothesis test
- Filled the `type` argument automatically in `generate()` based
Expand Down
24 changes: 8 additions & 16 deletions R/calculate.R
Original file line number Diff line number Diff line change
Expand Up @@ -125,18 +125,10 @@ calculate <- function(x,
"Your choice of `stat` is invalid for the ",
"types of variables `specify`ed."
))
else
class(result) <- append("infer", class(result))
# else
# class(result) <- append("infer", class(result))

attr(result, "response") <- attr(x, "response")
attr(result, "success") <- attr(x, "success")
attr(result, "explanatory") <- attr(x, "explanatory")
attr(result, "response_type") <- attr(x, "response_type")
attr(result, "explanatory_type") <- attr(x, "explanatory_type")
attr(result, "params") <- attr(x, "params")
attr(result, "distr_param") <- attr(x, "distr_param")
attr(result, "distr_param2") <- attr(x, "distr_param2")
attr(result, "theory_type") <- attr(x, "theory_type")
result <- set_attributes(to = result, from = x)
attr(result, "stat") <- stat

# For returning a 1x1 observed statistic value
Expand All @@ -152,7 +144,7 @@ calc_impl <-


calc_impl.mean <- function(stat, x, order, ...) {
col <- setdiff(names(x), "replicate")
col <- base::setdiff(names(x), "replicate")

x %>%
dplyr::group_by(replicate) %>%
Expand All @@ -161,23 +153,23 @@ calc_impl.mean <- function(stat, x, order, ...) {
}

calc_impl.median <- function(stat, x, order, ...) {
col <- setdiff(names(x), "replicate")
col <- base::setdiff(names(x), "replicate")

x %>%
dplyr::group_by(replicate) %>%
dplyr::summarize(stat = stats::median(!!(sym(col)), ...))
}

calc_impl.sd <- function(stat, x, order, ...) {
col <- setdiff(names(x), "replicate")
col <- base::setdiff(names(x), "replicate")

x %>%
dplyr::group_by(replicate) %>%
dplyr::summarize(stat = stats::sd(!!(sym(col)), ...))
}

calc_impl.prop <- function(stat, x, order, ...) {
col <- setdiff(names(x), "replicate")
col <- base::setdiff(names(x), "replicate")

## No longer needed with implementation of `check_point_params()`
# if(!is.factor(x[[col]])){
Expand Down Expand Up @@ -227,7 +219,7 @@ calc_impl.slope <- function(stat, x, order, ...) {

calc_impl.correlation <- function(stat, x, order, ...) {
x %>%
dplyr::summarize(stat = cor(!!attr(x, "explanatory"),
dplyr::summarize(stat = stats::cor(!!attr(x, "explanatory"),
!!attr(x, "response")))
}

Expand Down
84 changes: 84 additions & 0 deletions R/conf_int.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#' Compute the confidence interval for (currently only) simulation-based methods
#'
#' \code{get_confidence_interval()} and \code{get_ci()} are both aliases of \code{conf_int()}
#' @param x data frame of calculated statistics or containing attributes
#' of theoretical distribution values. Currently, dependent on statistics being stored in \code{stat} column as created in \code{calculate()} function.
#' @param level a numerical value between 0 and 1 giving the confidence level. Default value is 0.95.
#' @param type a string giving which method should be used for creating the confidence interval. The default is \code{"percentile"} with \code{"se"} corresponding to (2 * standard error) as the other option.
#' @param point_estimate a numeric value or a 1x1 data frame set to NULL by default. Needed to be provided if \code{type = "se"}.
#'
#' @return a 2 x 1 tibble with values corresponding to lower and upper values in the confidence interval
#' @export
#' @rdname get_ci
#' @examples
#' mtcars_df <- mtcars %>%
#' dplyr::mutate(am = factor(am))
#' d_hat <- mtcars_df %>%
#' specify(mpg ~ am) %>%
#' calculate(stat = "diff in means", order = c("1", "0"))
#' bootstrap_distn <- mtcars_df %>%
#' specify(mpg ~ am) %>%
#' generate(reps = 100) %>%
#' calculate(stat = "diff in means", order = c("1", "0"))
#' bootstrap_distn %>% conf_int(level = 0.9)
#' bootstrap_distn %>% conf_int(type = "se", point_estimate = d_hat)

conf_int <- function(x, level = 0.95, type = "percentile",
point_estimate = NULL){

check_ci_args(x, level, type, point_estimate)

if(type == "percentile") {
ci_vec <- stats::quantile(x[["stat"]],
probs = c((1 - level) / 2, level + (1 - level) / 2))

ci <- tibble::tibble(ci_vec[1], ci_vec[2])
names(ci) <- names(ci_vec)
} else {
point_estimate <- check_obs_stat(point_estimate)
multiplier <- stats::qnorm(1 - (1 - level) / 2)
ci <- tibble::tibble(
lower = point_estimate - multiplier * stats::sd(x[["stat"]]),
upper = point_estimate + multiplier * stats::sd(x[["stat"]]))
}

return(ci)
}

check_ci_args <- function(x, level, type, point_estimate){

if(!is.null(point_estimate)){
if(!is.data.frame(point_estimate))
assertive::assert_is_numeric(point_estimate)
else
assertive::assert_is_data.frame(point_estimate)
}
assertive::assert_is_data.frame(x)
assertive::assert_is_numeric(level)
if(level <= 0 || level >= 1){
stop(paste("The value of `level` must be between 0 and 1",
"non-inclusive."))
}

if(!(type %in% c("percentile", "se"))){
stop(paste('The options for `type` are "percentile" or "se".'))
}

if(type == "se" && is.null(point_estimate))
stop(paste('A numeric value needs to be given for `point_estimate`',
'for `type = "se"'))

if(type == "se" && is.vector(point_estimate))
assertive::assert_is_numeric(point_estimate)
}


#' @export
#' @rdname get_ci

get_ci <- conf_int

#' @export
#' @rdname get_ci

get_confidence_interval <- conf_int
1 change: 1 addition & 0 deletions R/hypothesize.R
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
#' @return A tibble containing the response (and explanatory, if specified)
#' variable data with parameter information stored as well
#' @importFrom dplyr as.tbl
#' @return a data frame with attributes set
#' @export
#' @examples
#' # Permutation test similar to ANOVA
Expand Down
3 changes: 2 additions & 1 deletion R/infer.R
Original file line number Diff line number Diff line change
Expand Up @@ -18,4 +18,5 @@ if(getRversion() >= "2.15.1")
"statistic", ".", "parameter", "p.value",
"xmin", "xmax", "density", "denom",
"diff_prop", "group_num", "n1", "n2",
"num_suc", "p_hat", "total_suc", "explan", "probs"))
"num_suc", "p_hat", "total_suc", "explan", "probs",
"conf.low", "conf.high"))
122 changes: 122 additions & 0 deletions R/p_value.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
#' Compute the p-value for (currently only) simulation-based methods
#' \code{get_pvalue()} is an alias of \code{p_value}
#' @param x data frame of calculated statistics or containing attributes
#' of theoretical distribution values
#' @param obs_stat a numeric value or a 1x1 data frame (as extreme or more extreme than this)
#' @param direction a character string. Options are "less", "greater", or "two_sided".
#' Can also specify "left", "right", or "both".
#'
#' @return a 1x1 data frame with value between 0 and 1
#' @export
#' @rdname get_pvalue
#' @examples
#' mtcars_df <- mtcars %>%
#' dplyr::mutate(am = factor(am))
#' d_hat <- mtcars_df %>%
#' specify(mpg ~ am) %>%
#' calculate(stat = "diff in means", order = c("1", "0"))
#' null_distn <- mtcars_df %>%
#' specify(mpg ~ am) %>%
#' hypothesize(null = "independence") %>%
#' generate(reps = 100) %>%
#' calculate(stat = "diff in means", order = c("1", "0"))
#' null_distn %>%
#' p_value(obs_stat = d_hat, direction = "right")

p_value <- function(x, obs_stat, direction){

assertive::assert_is_data.frame(x)
obs_stat <- check_obs_stat(obs_stat)
check_direction(direction)

is_simulation_based <- !is.null(attr(x, "generate")) &&
attr(x, "generate")

if(is_simulation_based)
pvalue <- simulation_based_p_value(x = x, obs_stat = obs_stat,
direction = direction)

## Theoretical-based p-value
# Could be more specific
# else if(is.null(attr(x, "theory_type")) || is.null(attr(x, "distr_param")))
# stop(paste("Attributes have not been set appropriately.",
# "Check your {infer} pipeline again."
# ))

# if(!("stat" %in% names(x))){
# # Theoretical distribution
# which_distribution(x,
# theory_type <- attr(x, "theory_type"),
# obs_stat = obs_stat,
# direction = direction)
# }

return(pvalue)
}

simulation_based_p_value <- function(x, obs_stat, direction){

if(direction %in% c("less", "left")){
p_value <- x %>%
dplyr::summarize(p_value = mean(stat <= obs_stat))
}
else if(direction %in% c("greater", "right")){
p_value <- x %>%
dplyr::summarize(p_value = mean(stat >= obs_stat))
}
else{
p_value <- x %>% two_sided_p_value(obs_stat = obs_stat)
}

p_value
}

two_sided_p_value <- function(x, obs_stat){

if(stats::median(x$stat) >= obs_stat){
basic_p_value <- get_percentile(x$stat, obs_stat) +
(1 - get_percentile(x$stat, stats::median(x$stat) +
stats::median(x$stat) - obs_stat))
} else {
basic_p_value <- 1 - get_percentile(x$stat, obs_stat) +
(get_percentile(x$stat, stats::median(x$stat) +
stats::median(x$stat) - obs_stat))
}

if(basic_p_value >= 1)
# Catch all if adding both sides produces a number
# larger than 1. Should update with test in that
# scenario instead of using >=
return(tibble::tibble(p_value = 1))
else
return(tibble::tibble(p_value = basic_p_value))
}

#' @export
#' @rdname get_pvalue
get_pvalue <- p_value

# which_distribution <- function(x, theory_type, obs_stat, direction){
#
# param <- attr(x, "distr_param")
# if(!is.null(attr(x, "distr_param2")))
# param2 <- attr(x, "distr_param2")
#
# if(theory_type == "Two sample t")
# return(pt(q = obs_stat,
# df = param,
# lower.tail = set_lower_tail(direction))
# )
#
# }

#theory_t_pvalue <-

# set_lower_tail <- function(direction){
# if(direction %in% c("greater", "right"))
# lower_tail <- FALSE
# else
# lower_tail <- TRUE
#
# lower_tail
# }
4 changes: 2 additions & 2 deletions R/pipe.R
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#' Pipe
#'
#' Like \code{dplyr}, \code{infer} also uses the pipe function, \code{\%>\%} to turn
#' function composition into a series of imperative statements.
#' Like \code{dplyr}, \code{infer} also uses the pipe function, \code{\%>\%}
#' to turn function composition into a series of imperative statements.
#'
#' @importFrom magrittr %>%
#' @name %>%
Expand Down
Loading