-
Notifications
You must be signed in to change notification settings - Fork 9
/
get_mallows_loglik.R
82 lines (77 loc) · 3.09 KB
/
get_mallows_loglik.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
#' Likelihood and log-likelihood evaluation for a Mallows mixture model
#'
#' @description Compute either the likelihood or the log-likelihood value of the
#' Mallows mixture model parameters for a dataset of complete rankings.
#' @param rho A matrix of size `n_clusters x n_items` whose rows are
#' permutations of the first n_items integers corresponding to the modal
#' rankings of the Mallows mixture components.
#' @param alpha A vector of `n_clusters` non-negative scalar specifying the
#' scale (precision) parameters of the Mallows mixture components.
#' @param weights A vector of `n_clusters` non-negative scalars specifying
#' the mixture weights.
#' @param metric Character string specifying the distance measure to use.
#' Available options are `"kendall"`, `"cayley"`, `"hamming"`,
#' `"ulam"`, `"footrule"`, and `"spearman"`.
#' @param rankings A matrix with observed rankings in each row.
#' @param observation_frequency A vector of observation frequencies (weights) to apply to
#' each row in `rankings`. This can speed up computation if a large
#' number of assessors share the same rank pattern. Defaults to `NULL`,
#' which means that each row of `rankings` is multiplied by 1. If
#' provided, `observation_frequency` must have the same number of elements as there
#' are rows in `rankings`, and `rankings` cannot be `NULL`.
#' @param log A logical; if TRUE, the log-likelihood value is returned,
#' otherwise its exponential. Default is `TRUE`.
#'
#' @return The likelihood or the log-likelihood value corresponding to one or
#' more observed complete rankings under the Mallows mixture rank model with
#' distance specified by the `metric` argument.
#' @export
#'
#' @example inst/examples/get_mallows_loglik_example.R
#' @family rank functions
#'
get_mallows_loglik <- function(
rho, alpha, weights,
metric = c("footrule", "spearman", "cayley", "hamming", "kendall", "ulam"),
rankings, observation_frequency = NULL, log = TRUE) {
metric <- match.arg(metric, c(
"footrule", "spearman", "cayley", "hamming",
"kendall", "ulam"
))
if (!is.matrix(rankings)) rankings <- matrix(rankings, nrow = 1)
if (!is.null(observation_frequency)) {
if (nrow(rankings) != length(observation_frequency)) {
stop(
"observation_frequency must be ",
"of same length as the number of rows in rankings"
)
}
} else {
observation_frequency <- rep(1, nrow(rankings))
}
if (!is.matrix(rho)) rho <- matrix(rho, nrow = 1)
n_clusters <- length(weights)
n_items <- ncol(rankings)
N <- sum(observation_frequency)
pfun_values <- prepare_partition_function(metric, n_items)
loglik <- vapply(
X = seq_len(n_clusters),
FUN = function(g) {
-(alpha[g] / n_items * sum(get_rank_distance(
rankings = t(rankings),
rho = rho[g, ],
metric = metric
) * observation_frequency) +
N * get_partition_function(
alpha = alpha[g], n_items = n_items, metric = metric, pfun_values
)) *
weights[[g]]
},
FUN.VALUE = numeric(1)
)
if (!log) {
exp(sum(loglik))
} else {
sum(loglik)
}
}