-
Notifications
You must be signed in to change notification settings - Fork 2
/
cluster_jackknife.R
97 lines (83 loc) · 2.69 KB
/
cluster_jackknife.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
cluster_jackknife <- function(
y,
X,
cluster_df,
type){
#' Conducts the Cluster Jackknife as in MNW (2022) for
#' CRV3 / CRV3J variance matrix estimation
#' @param y A vector containing the dependent variable
#' @param X A regression design matrix
#' @param cluster_df A data.frame containing the clustering variable
#' @param type Either "CRV3" or "CRV3J", where each implements the
#' variance-covariance estimator from MNW (2022) of the same name
#'
#' @return An list, containing
#' \item{vcov}{The CRV3 variance-covariance matrix}
#' \item{beta_jack}{The leave-one-cluster-out jackknive regression
#' coefficients}
#' \item{unique_clusters}{A vector containing all unique clusters contained in
#' `cluster_df`}
#' \item{tXgXg}{A list containing crossproducts of Xg, where X g is the
#' submatrix of the design matrix X which belong to observations in cluster g}
#' \item{tXX}{The crossproduct of the design matrix X}
#' \item{tXy}{t(X) %*% y}
#' \item{G}{The number of unique clusters}
#' \item{small_sample_correction}{The employed small sample correction}
#' @noRd
unique_clusters <- as.character(unique(cluster_df[, , drop = TRUE]))
G <- length(unique_clusters)
small_sample_correction <- (G - 1) / G
# calculate X_g'X_g
tXgXg <- lapply(
unique_clusters,
function(x) crossprod(X[cluster_df == x, , drop = FALSE])
)
names(tXgXg) <- unique_clusters
tXX <- Reduce("+", tXgXg)
# all.equal(tXX, crossprod(X))
tXgyg <- lapply(
unique_clusters,
function(x) {
t(X[cluster_df == x, , drop = FALSE]) %*% y[cluster_df == x, drop = FALSE]
}
)
names(tXgyg) <- unique_clusters
tXy <- Reduce("+", tXgyg)
# all.equal(tXy, t(X) %*% y)
beta_hat <- solve(tXX) %*% tXy
# initiate jackknife
beta_jack <-
lapply(
unique_clusters,
function(x) {
MASS::ginv(tXX - tXgXg[[x]]) %*% (tXy - (t(X[cluster_df == x, , drop = FALSE]) %*% y[cluster_df == x, drop = FALSE]))
}
)
if (type == "CRV3J") {
beta_bar <- beta_center <- Reduce("+", beta_jack) / G
} else if (type == "CRV3") {
beta_center <- beta_hat
}
V3 <- lapply(
seq_along(unique_clusters),
function(x) {
tcrossprod(beta_jack[[x]] - beta_center)
}
)
vcov <- Reduce("+", V3) * small_sample_correction
beta_jack <- Reduce("cbind", beta_jack)
rownames(beta_jack) <- colnames(tXX)
colnames(beta_jack) <- unique_clusters
colnames(vcov) <- rownames(vcov) <- colnames(tXX)
res <- list(
vcov = vcov,
beta_jack = beta_jack,
unique_clusters = unique_clusters,
tXgXg = tXgXg,
tXX = tXX,
tXy = tXy,
G = G,
small_sample_correction = small_sample_correction
)
res
}