-
Notifications
You must be signed in to change notification settings - Fork 14
/
distUnion.R
77 lines (64 loc) · 2.63 KB
/
distUnion.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
#' Combine multiple distance specifications into a single distance specification.
#'
#' Creates a new distance specification from the union of two or more
#' distance specifications. The constituent distances specifications
#' may have overlapping treated and control units (identified by the
#' \code{rownames} and \code{colnames} respectively).
#'
#' For combining multiple distance specifications with common
#' controls, but different treated units, \code{\link{rbind}} provides
#' a way to combine the different objects. Likewise,
#' \code{\link{cbind}} provides a way to combine distance
#' specifications over common treated units, but different control
#' units.
#'
#' \code{distUnion} can combine distance units that have common
#' treated and control units into a coherent single distance
#' object. If there are duplicate treated-control entries in multiple
#' input distances, the first entry will be used.
#'
#' @param ... The distance specifications (as created with with
#' \code{\link{match_on}}, \code{\link{exactMatch}}, or other distance
#' creation function).
#' @return An InfinitySparseMatrix object with all treated and control
#' units from the arguments combined. Duplicate entries are resolved
#' in favor of the earliest argument (e.g., \code{distUnion(A, B)}
#' will favor entries in \code{A} over entries in \code{B}).
#' @seealso \code{\link{match_on}}, \code{\link{exactMatch}},
#' \code{\link{fullmatch}}, \code{\link{pairmatch}},
#' \code{\link{cbind}}, \code{\link{rbind}}
#' @export
distUnion <- function(...) {
arglst <- list(...)
if (!all(vapply(arglst, validDistanceSpecification, logical(1)))) {
stop("All arguments must be valid distance specifications")
}
isms <- lapply(arglst, as.InfinitySparseMatrix)
treateds <- lapply(isms, function(i) { i@rownames })
controls <- lapply(isms, function(i) { i@colnames })
utreated <- unique(unlist(treateds))
ucontrols <- unique(unlist(controls))
tmap <- seq_along(utreated)
names(tmap) <- utreated
cmap <- seq_along(ucontrols)
names(cmap) <- ucontrols
updated.isms <- lapply(isms, function(i) {
rnms <- i@rownames[i@rows]
cnms <- i@colnames[i@cols]
i@rows <- tmap[rnms]
i@cols <- cmap[cnms]
return(i)
})
pairs <- matrix(c(unlist(lapply(updated.isms, function(i) { i@cols })),
unlist(lapply(updated.isms, function(i) { i@rows }))),
ncol = 2)
dups <- duplicated(pairs)
pairs <- pairs[!dups, ]
unionism <- makeInfinitySparseMatrix(
unlist(lapply(updated.isms, function(i) { i@.Data }))[!dups],
cols = pairs[, 1],
rows = pairs[, 2],
ucontrols,
utreated)
return(unionism)
}