/
interaction.R
175 lines (160 loc) · 6.76 KB
/
interaction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
swishInterxPair <- function(infRepsArray, condition, covariate, pair,
nperms=100, pc=5, quiet=FALSE) {
stopifnot(is.numeric(pair) | is.character(pair) | is.factor(pair))
stopifnot(!anyNA(covariate))
stopifnot(!anyNA(pair))
pair <- as.integer(factor(pair))
if (!all(table(pair, condition) == 1))
stop("'pair' should have a single sample for both levels of 'condition'")
stopifnot(nlevels(covariate) == 2)
if (!all(table(pair, covariate) %in% c(0,2)))
stop("'pair' should be nested within 'cov'")
dims <- dim(infRepsArray)
out <- getInterxPairStat(infRepsArray, condition, covariate, pair, pc)
stat <- out$stat
group <- out$group
lfcArray <- out$lfcArray
ranks <- out$ranks
# old code re-computed ranks per permutation
# ranks <- NULL
grp1 <- group == levels(group)[1]
grp2 <- group == levels(group)[2]
lfc_mat <- apply(lfcArray[,grp2,], c(1,3), mean) -
apply(lfcArray[,grp1,], c(1,3), mean)
# the reported log2FC is the difference in the mean LFC between the two groups
# the median here is taken over inferential replicates
log2FC <- rowMedians(lfc_mat)
# the permutation framework is to permute which pairs are in which group
perms <- getPerms(group, nperms)
nperms <- permsNote(perms, nperms)
nulls <- matrix(nrow=dims[1], ncol=nperms)
if (!quiet) message("generating test statistics over permutations")
for (p in seq_len(nperms)) {
if (!quiet) svMisc::progress(p, max.value=nperms, init=(p==1), gui=FALSE)
nulls[,p] <- getSamStat(lfcArray,
group[perms$perms[p,]],
ranks=ranks)
}
if (!quiet) message("")
list(stat=stat, log2FC=log2FC, nulls=nulls)
}
swishInterx <- function(infRepsArray, condition, covariate,
nperms=100, pc=5, nRandomPairs=30,
quiet=FALSE) {
stopifnot(nlevels(covariate) == 2)
stopifnot(!anyNA(covariate))
if (!all(table(condition, covariate) > 0))
stop("swish with interaction across two variables requires samples for each combination")
dims <- dim(infRepsArray)
tab <- table(condition, covariate)
# if sizes are equal, don't need to double or splice out columns
all_equal <- all(tab[,2] == tab[,1])
# don't have pairs, but instead we use pseudo-pairs multiple times
stats <- matrix(nrow=dims[1], ncol=nRandomPairs)
for (r in seq_len(nRandomPairs)) {
# the easy case: balanced numbers of samples across condition
if (all_equal) {
pair <- getPseudoPair(condition, covariate)
out <- getInterxPairStat(infRepsArray, condition, covariate,
pair, pc)
} else {
# random subsetting to balance groups, then use pseudo pairs
idx <- randomSamplesToRemove(tab, condition, covariate)
pair <- getPseudoPair(condition[-idx], covariate[-idx])
out <- getInterxPairStat(infRepsArray[,-idx,],
condition[-idx], covariate[-idx],
pair, pc)
}
stats[,r] <- out$stat
}
stat <- rowMeans(stats)
# any of the iterations works to define group
# (this is an ordered integer vector of pairs
# across covariate, with condition collapsed as LFCs)
group <- out$group
# log2FC is the difference between condition LFC across covariate groups
log2FC <- getDeltaLFC(infRepsArray, condition, covariate, pc)
# this permutation scheme is different than others in swish
# (and slower) because we reform lfcArray inside the
# permutation loop - necessary because of the random
# pseudo pairs
perms <- getPerms(group, nperms)
nperms <- permsNote(perms, nperms)
nulls <- matrix(nrow=dims[1], ncol=nperms)
if (!quiet) message("generating test statistics over permutations")
for (p in seq_len(nperms)) {
if (!quiet) svMisc::progress(p, max.value=nperms, init=(p==1), gui=FALSE)
if (all_equal) {
# first draw a pseudo-pairing
pair <- getPseudoPair(condition, covariate)
out <- getInterxPairStat(infRepsArray, condition, covariate,
pair, pc)
} else {
idx <- randomSamplesToRemove(tab, condition, covariate)
pair <- getPseudoPair(condition[-idx], covariate[-idx])
out <- getInterxPairStat(infRepsArray[,-idx,],
condition[-idx], covariate[-idx],
pair, pc)
}
lfcArray <- out$lfcArray
# then permute the pseudo-pairs across covariate
nulls[,p] <- getSamStat(lfcArray,
group[perms$perms[p,]])
}
if (!quiet) message("")
list(stat=stat, log2FC=log2FC, nulls=nulls)
}
getInterxPairStat <- function(infRepsArray, condition, covariate, pair, pc) {
# 'lfcArray' is an array of genes x pair x inf rep
# it is in the order of the pair (1,2,3,...)
lfcArray <- getLog2FCPair(infRepsArray, condition, pair, pc, array=TRUE)
dat <- data.frame(pair, covariate, stringsAsFactors=FALSE)
dat <- dat[!duplicated(dat$pair),]
dat <- dat[order(dat$pair),]
group <- dat$covariate # this is now along 'lfcArray'
stopifnot(length(group) == dim(lfcArray)[2])
# here we perform Wilcoxon rank sum testing of the condition LFCs across group
out <- getSamStat(lfcArray, group, returnRanks=TRUE)
list(stat=out$stat, ranks=out$ranks, group=group, lfcArray=lfcArray)
}
getDeltaLFC <- function(infRepsArray, condition, covariate, pc) {
grp1 <- covariate == levels(covariate)[1]
grp2 <- covariate == levels(covariate)[2]
lfc1 <- getLog2FC(infRepsArray[,grp1,], condition[grp1], pc=pc, array=TRUE)
lfc2 <- getLog2FC(infRepsArray[,grp2,], condition[grp2], pc=pc, array=TRUE)
# the difference in LFC between the two groups, median over inf reps
rowMedians(lfc2 - lfc1)
}
getPseudoPair <- function(condition, covariate) {
pair <- integer(length(condition))
cond1 <- condition == levels(condition)[1]
cond2 <- condition == levels(condition)[2]
pair[cond2] <- seq_len(sum(cond2))
for (i in 1:2) {
grp <- covariate == levels(covariate)[i]
pair[cond1 & grp] <- sample(x=pair[cond2 & grp],
size=sum(cond1 & grp),
replace=FALSE)
}
pair
}
randomSamplesToRemove <- function(tab, condition, covariate) {
cond1 <- condition == levels(condition)[1]
cond2 <- condition == levels(condition)[2]
cov_lvls <- levels(covariate)
idx <- numeric()
# find covariate levels which are imbalanced
for (j in which(tab[1,] != tab[2,])) {
cond1small <- tab[1,j] < tab[2,j]
if (cond1small) {
idx <- c(idx, sample(which(cond2 & covariate == cov_lvls[j]),
tab[2,j] - tab[1,j],
replace=FALSE))
} else {
idx <- c(idx, sample(which(cond1 & covariate == cov_lvls[j]),
tab[1,j] - tab[2,j],
replace=FALSE))
}
}
idx
}