-
Notifications
You must be signed in to change notification settings - Fork 15
/
dist_multinomial.R
125 lines (118 loc) · 3.55 KB
/
dist_multinomial.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
#' The Multinomial distribution
#'
#' @description
#' `r lifecycle::badge('stable')`
#'
#' The multinomial distribution is a generalization of the binomial
#' distribution to multiple categories. It is perhaps easiest to think
#' that we first extend a [dist_bernoulli()] distribution to include more
#' than two categories, resulting in a [dist_categorical()] distribution.
#' We then extend repeat the Categorical experiment several (\eqn{n})
#' times.
#'
#' @param size The number of draws from the Categorical distribution.
#' @param prob The probability of an event occurring from each draw.
#'
#' @details
#'
#' We recommend reading this documentation on
#' <https://pkg.mitchelloharawild.com/distributional/>, where the math
#' will render nicely.
#'
#' In the following, let \eqn{X = (X_1, ..., X_k)} be a Multinomial
#' random variable with success probability `p` = \eqn{p}. Note that
#' \eqn{p} is vector with \eqn{k} elements that sum to one. Assume
#' that we repeat the Categorical experiment `size` = \eqn{n} times.
#'
#' **Support**: Each \eqn{X_i} is in \eqn{{0, 1, 2, ..., n}}.
#'
#' **Mean**: The mean of \eqn{X_i} is \eqn{n p_i}.
#'
#' **Variance**: The variance of \eqn{X_i} is \eqn{n p_i (1 - p_i)}.
#' For \eqn{i \neq j}, the covariance of \eqn{X_i} and \eqn{X_j}
#' is \eqn{-n p_i p_j}.
#'
#' **Probability mass function (p.m.f)**:
#'
#' \deqn{
#' P(X_1 = x_1, ..., X_k = x_k) = \frac{n!}{x_1! x_2! ... x_k!} p_1^{x_1} \cdot p_2^{x_2} \cdot ... \cdot p_k^{x_k}
#' }{
#' P(X_1 = x_1, ..., X_k = x_k) = n! / (x_1! x_2! ... x_k!) p_1^x_1 p_2^x_2 ... p_k^x_k
#' }
#'
#' **Cumulative distribution function (c.d.f)**:
#'
#' Omitted for multivariate random variables for the time being.
#'
#' **Moment generating function (m.g.f)**:
#'
#' \deqn{
#' E(e^{tX}) = \left(\sum_{i=1}^k p_i e^{t_i}\right)^n
#' }{
#' E(e^(tX)) = (p_1 e^t_1 + p_2 e^t_2 + ... + p_k e^t_k)^n
#' }
#'
#' @seealso [stats::Multinomial]
#'
#' @examples
#' dist <- dist_multinomial(size = c(4, 3), prob = list(c(0.3, 0.5, 0.2), c(0.1, 0.5, 0.4)))
#'
#' dist
#' mean(dist)
#' variance(dist)
#'
#' generate(dist, 10)
#'
#' # TODO: Needs fixing to support multiple inputs
#' # density(dist, 2)
#' # density(dist, 2, log = TRUE)
#'
#' @name dist_multinomial
#' @export
dist_multinomial <- function(size, prob){
size <- vec_cast(size, double())
prob <- lapply(prob, function(x) x/sum(x))
prob <- as_list_of(prob, .ptype = double())
new_dist(s = size, p = prob, class = "dist_multinomial")
}
#' @export
format.dist_multinomial <- function(x, digits = 2, ...){
sprintf(
"Multinomial(%s)[%s]",
format(x[["s"]], digits = digits, ...),
format(length(x[["p"]]), digits = digits, ...)
)
}
#' @export
density.dist_multinomial <- function(x, at, ...){
if(is.list(at)) return(vapply(at, density, numeric(1L), x = x, ...))
stats::dmultinom(at, x[["s"]], x[["p"]])
}
#' @export
log_density.dist_multinomial <- function(x, at, ...){
stats::dmultinom(at, x[["s"]], x[["p"]], log = TRUE)
}
#' @export
generate.dist_multinomial <- function(x, times, ...){
t(stats::rmultinom(times, x[["s"]], x[["p"]]))
}
#' @export
mean.dist_multinomial <- function(x, ...){
matrix(x[["s"]]*x[["p"]], nrow = 1)
}
#' @export
covariance.dist_multinomial <- function(x, ...){
s <- x[["s"]]
p <- x[["p"]]
v <- numeric(length(p)^2)
for(i in seq_along(p)){
for(j in seq_along(p)){
v[(i-1)*length(p) + j] <- if(i == j) s*p[i]*(1-p[j]) else -s*p[i]*p[j]
}
}
list(matrix(v, nrow = length(p)))
}
#' @export
dim.dist_multinomial <- function(x){
length(x[["p"]])
}