-
Notifications
You must be signed in to change notification settings - Fork 1
/
random-tidy-zero-truncated-binomial.R
148 lines (129 loc) · 4.67 KB
/
random-tidy-zero-truncated-binomial.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
#' Tidy Randomly Generated Binomial Distribution Tibble
#'
#' @family Discrete Distribution
#' @family Binomial
#' @family Zero Truncated Distribution
#'
#' @author Steven P. Sanderson II, MPH
#'
#' @seealso \url{https://openacttexts.github.io/Loss-Data-Analytics/ChapSummaryDistributions.html}
#'
#' @details This function uses the underlying `actuar::rztbinom()`, and its underlying
#' `p`, `d`, and `q` functions. For more information please see [actuar::rztbinom()]
#'
#' @description This function will generate `n` random points from a zero truncated binomial
#' distribution with a user provided, `.size`, `.prob`, and number of
#' random simulations to be produced. The function returns a tibble with the
#' simulation number column the x column which corresponds to the n randomly
#' generated points, the `d_`, `p_` and `q_` data points as well.
#'
#' The data is returned un-grouped.
#'
#' The columns that are output are:
#'
#' - `sim_number` The current simulation number.
#' - `x` The current value of `n` for the current simulation.
#' - `y` The randomly generated data point.
#' - `dx` The `x` value from the [stats::density()] function.
#' - `dy` The `y` value from the [stats::density()] function.
#' - `p` The values from the resulting p_ function of the distribution family.
#' - `q` The values from the resulting q_ function of the distribution family.
#'
#' @param .n The number of randomly generated points you want.
#' @param .size Number of trials, zero or more.
#' @param .prob Probability of success on each trial 0 <= prob <= 1.
#' @param .num_sims The number of randomly generated simulations you want.
#' @param .return_tibble A logical value indicating whether to return the result
#' as a tibble. Default is TRUE.
#'
#' @examples
#' tidy_zero_truncated_binomial()
#'
#' @return
#' A tibble of randomly generated data.
#'
#' @name tidy_zero_truncated_binomial
NULL
#' @export
#' @rdname tidy_zero_truncated_binomial
tidy_zero_truncated_binomial <- function(.n = 50, .size = 1, .prob = 1, .num_sims = 1,
.return_tibble = TRUE) {
# Tidyeval ----
n <- as.integer(.n)
num_sims <- as.integer(.num_sims)
size <- as.numeric(.size)
prob <- as.numeric(.prob)
ret_tbl <- as.logical(.return_tibble)
# Checks ----
if (!is.integer(n) | n < 0) {
rlang::abort(
"The parameters '.n' must be of class integer. Please pass a whole
number like 50 or 100. It must be greater than 0."
)
}
if (!is.integer(num_sims) | num_sims < 0) {
rlang::abort(
"The parameter `.num_sims' must be of class integer. Please pass a
whole number like 50 or 100. It must be greater than 0."
)
}
if (!is.numeric(size) | !is.numeric(prob)) {
rlang::abort(
"The parameters of .size and .prob must be of class numeric and greater than 0."
)
}
if (size < 1) {
rlang::abort("The parameter of .size must be greater than or equal to 1.")
}
if (prob > 1 | prob < 0) {
rlang::abort("The parameter of .prob must be 0 <= .prob <= 1")
}
x <- seq(1, num_sims, 1)
# ps <- seq(-n, n - 1, 2)
qs <- seq(0, 1, (1 / (n - 1)))
ps <- qs
# Create a data.table with one row per simulation
df <- data.table::CJ(sim_number = factor(1:num_sims), x = 1:n)
# Group the data by sim_number and add columns for x and y
df[, y := actuar::rztbinom(n = .N, size = size, prob = prob)]
# Compute the density of the y values and add columns for dx and dy
df[, c("dx", "dy") := density(y, n = n)[c("x", "y")], by = sim_number]
# Compute the p-values for the y values and add a column for p
df[, p := actuar::pztbinom(y, size = size, prob = prob)]
# Compute the q-values for the p-values and add a column for q
df[, q := actuar::qztbinom(p, size = size, prob = prob)]
if(.return_tibble){
df <- dplyr::as_tibble(df)
} else {
data.table::setkey(df, NULL)
}
# Create a tibble of the parameter grid
param_grid <- dplyr::tibble(.size, .prob)
# Attach descriptive attributes to tibble
attr(df, "distribution_family_type") <- "discrete"
attr(df, ".size") <- .size
attr(df, ".prob") <- .prob
attr(df, ".n") <- .n
attr(df, ".num_sims") <- .num_sims
attr(df, ".ret_tbl") <- .return_tibble
attr(df, "tibble_type") <- "tidy_zero_truncated_binomial"
attr(df, "ps") <- ps
attr(df, "qs") <- qs
attr(df, "param_grid") <- param_grid
attr(df, "param_grid_txt") <- paste0(
"c(",
paste(param_grid[, names(param_grid)], collapse = ", "),
")"
)
attr(df, "dist_with_params") <- paste0(
"Zero Truncated Binomial",
" ",
paste0(
"c(",
paste(param_grid[, names(param_grid)], collapse = ", "),
")"
)
)
# Return final result as function output
return(df)
}