Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 233 lines (180 sloc) 7.47 kB
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
1 siegel.tukey <- function(x, y, id.col = FALSE, adjust.median = F,
2 rnd = -1, alternative = "two.sided", mu = 0, paired = FALSE,
3 exact = FALSE, correct = TRUE, conf.int = FALSE, conf.level = 0.95) {
4 ###### published on:
5 # http://www.r-statistics.com/2010/02/siegel-tukey-a-non-parametric-test-for-equality-in-variability-r-code/
6 ## Main author of the function: Daniel Malter
7
8 # x: a vector of data
9
10 # y: Group indicator (if id.col=TRUE); data of the second
11 # group (if
12 # id.col=FALSE). If y is the group indicator it MUST take 0
13 # or 1 to indicate
14 # the groups, and x must contain the data for both groups.
15
16 # id.col: If TRUE (default), then x is the data column and y
17 # is the ID column,
18 # indicating the groups. If FALSE, x and y are both data
19 # columns. id.col must
20 # be FALSE only if both data columns are of the same length.
21
22 # adjust.median: Should between-group differences in medians
23 # be leveled before
24 # performing the test? In certain cases, the Siegel-Tukey
25 # test is susceptible
26 # to median differences and may indicate significant
27 # differences in
28 # variability that, in reality, stem from differences in
29 # medians.
30
31 # rnd: Should the data be rounded and, if so, to which
32 # decimal? The default
33 # (-1) uses the data as is. Otherwise, rnd must be a
34 # non-negative integer.
35 # Typically, this option is not needed. However,
36 # occasionally, differences in
37 # the precision with which certain functions return values
38 # cause the merging
39 # of two data frames to fail within the siegel.tukey
40 # function. Only then
41 # rounding is necessary. This operation should not be
42 # performed if it affects
43 # the ranks of observations.
44
45 # … arguments passed on to the Wilcoxon test. See
46 # ?wilcox.test
47
48 # Value: Among other output, the function returns the data,
49 # the Siegel-Tukey
50 # ranks, the associated Wilcoxon’s W and the p-value for a
51 # Wilcoxon test on
52 # tie-adjusted Siegel-Tukey ranks (i.e., it performs and
53 # returns a
54 # Siegel-Tukey test). If significant, the group with the
55 # smaller rank sum has
56 # greater variability.
57
58 # References: Sidney Siegel and John Wilder Tukey (1960) “A
59 # nonparametric sum
60 # of ranks procedure for relative spread in unpaired
61 # samples.” Journal of the
62 # American Statistical Association. See also, David J.
63 # Sheskin (2004)
64 # ”Handbook of parametric and nonparametric statistical
65 # procedures.” 3rd
66 # edition. Chapman and Hall/CRC. Boca Raton, FL.
67
68 # Notes: The Siegel-Tukey test has relatively low power and
69 # may, under certain
70 # conditions, indicate significance due to differences in
71 # medians rather than
72 # differences in variabilities (consider using the argument
73 # adjust.median).
74
75 # Output (in this order)
76
77 # 1. Group medians (after median adjustment if specified)
78 # 2. Wilcoxon-test for between-group differences in medians
79 # (after the median
80 # adjustment if specified)
81 # 3. Data, group membership, and the Siegel-Tukey ranks
82 # 4. Mean Siegel-Tukey rank by group (smaller values indicate
83 # greater
84 # variability)
85 # 5. Siegel-Tukey test (Wilcoxon test on tie-adjusted
86 # Siegel-Tukey ranks)
87
88 is.formula <- function(x) class(x) == "formula"
89
90 if (is.formula(x)) {
91 y <- do.call(c, list(as.name(all.vars(x)[2])), envir = parent.frame(2))
92 x <- do.call(c, list(as.name(all.vars(x)[1])), envir = parent.frame(2)) # I am using parent.frame(2) since if the name of the variable in the equation is 'x', then we will mistakenly get the function in here instead of the vector.
93 id.col <- TRUE
94 # print(x)
95 # print(ls.str())
96 # data=data.frame(c(x,y),rep(c(0,1),c(length(x),length(y))))
97 # print(data)
98 }
99
100 if (id.col == FALSE) {
101 data = data.frame(c(x, y), rep(c(0, 1), c(length(x), length(y))))
102 } else {
103 data = data.frame(x, y)
104 }
105 names(data) = c("x", "y")
106 data = data[order(data$x), ]
107 if (rnd > -1) {
108 data$x = round(data$x, rnd)
109 }
110
111 if (adjust.median == T) {
112 cat("\n", "Adjusting medians...", "\n", sep = "")
113 data$x[data$y == 0] = data$x[data$y == 0] - (median(data$x[data$y ==
114 0]))
115 data$x[data$y == 1] = data$x[data$y == 1] - (median(data$x[data$y ==
116 1]))
117 }
118 cat("\n", "Median of group 1 = ", median(data$x[data$y == 0]),
119 "\n", sep = "")
120 cat("Median of group 2 = ", median(data$x[data$y == 1]), "\n",
121 "\n", sep = "")
122 cat("Testing median differences...", "\n")
123 print(wilcox.test(data$x[data$y == 0], data$x[data$y == 1]))
124
125 # The following must be done for the case when id.col==F
126 x <- data$x
127 y <- data$y
128
129 cat("Performing Siegel-Tukey rank transformation...", "\n",
130 "\n")
131
132
133
134 sort.x <- sort(data$x)
135 sort.id <- data$y[order(data$x)]
136
137 data.matrix <- data.frame(sort.x, sort.id)
138
139 base1 <- c(1, 4)
140 iterator1 <- matrix(seq(from = 1, to = length(x), by = 4)) -
141 1
142 rank1 <- apply(iterator1, 1, function(x) x + base1)
143
144 iterator2 <- matrix(seq(from = 2, to = length(x), by = 4))
145 base2 <- c(0, 1)
146 rank2 <- apply(iterator2, 1, function(x) x + base2)
147
148 #print(rank1)
149 #print(rank2)
150
151 if (length(rank1) == length(rank2)) {
152 rank <- c(rank1[1:floor(length(x)/2)], rev(rank2[1:ceiling(length(x)/2)]))
153 } else {
154 rank <- c(rank1[1:ceiling(length(x)/2)], rev(rank2[1:floor(length(x)/2)]))
155 }
156
157
158 unique.ranks <- tapply(rank, sort.x, mean)
159 unique.x <- as.numeric(as.character(names(unique.ranks)))
160
161 rank.matrix <- data.frame(unique.x, unique.ranks)
162
163 ST.matrix <- merge(data.matrix, rank.matrix, by.x = "sort.x",
164 by.y = "unique.x")
165
166 print(ST.matrix)
167
168 cat("\n", "Performing Siegel-Tukey test...", "\n", sep = "")
169
170 ranks0 <- ST.matrix$unique.ranks[ST.matrix$sort.id == 0]
171 ranks1 <- ST.matrix$unique.ranks[ST.matrix$sort.id == 1]
172
173 cat("\n", "Mean rank of group 0: ", mean(ranks0), "\n", sep = "")
174 cat("Mean rank of group 1: ", mean(ranks1), "\n", sep = "")
175
176 print(wilcox.test(ranks0, ranks1, alternative = alternative,
177 mu = mu, paired = paired, exact = exact, correct = correct,
178 conf.int = conf.int, conf.level = conf.level))
179 }
79ba722 @talgalili Adding siegel.tukey.r
authored
180
181
041f1ba @talgalili Making x also work when it is a formula
authored
182
183
184
185
186
79ba722 @talgalili Adding siegel.tukey.r
authored
187 if(F) {
188
189 #Example:
190
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
191 ### 1
79ba722 @talgalili Adding siegel.tukey.r
authored
192 x=c(4,4,5,5,6,6)
193 y=c(0,0,1,9,10,10)
ac82586 @talgalili Fixed the case for id.col=F
authored
194 siegel.tukey(x,y, F)
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
195 siegel.tukey(x,y) #same as above
ac82586 @talgalili Fixed the case for id.col=F
authored
196
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
197 ### 2
ac82586 @talgalili Fixed the case for id.col=F
authored
198 # example for a non equal number of cases:
199 x=c(4,4,5,5,6,6)
200 y=c(0,0,1,9,10)
201 siegel.tukey(x,y,F)
79ba722 @talgalili Adding siegel.tukey.r
authored
202
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
203 ### 3
79ba722 @talgalili Adding siegel.tukey.r
authored
204 x <- c(33, 62, 84, 85, 88, 93, 97, 4, 16, 48, 51, 66, 98)
205 id <- c(0,0,0,0,0,0,0,1,1,1,1,1,1)
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
206 siegel.tukey(x,id,T)
041f1ba @talgalili Making x also work when it is a formula
authored
207 siegel.tukey(x~id) # from now on, this also works as a function...
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
208 siegel.tukey(x,id,T,adjust.median=F,exact=T)
79ba722 @talgalili Adding siegel.tukey.r
authored
209
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
210 ### 4
79ba722 @talgalili Adding siegel.tukey.r
authored
211 x<-c(177,200,227,230,232,268,272,297,47,105,126,142,158,172,197,220,225,230,262,270)
212 id<-c(rep(0,8),rep(1,12))
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
213 siegel.tukey(x,id,T,adjust.median=T)
79ba722 @talgalili Adding siegel.tukey.r
authored
214
215
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
216 ### 5
79ba722 @talgalili Adding siegel.tukey.r
authored
217 x=c(33,62,84,85,88,93,97)
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
218 y=c(4,16,48,51,66,98)
79ba722 @talgalili Adding siegel.tukey.r
authored
219 siegel.tukey(x,y)
220
269711d @talgalili Fixed function indentation and changed id.col default to be F
authored
221 ### 6
222 x<-c(0,0,1,4,4,5,5,6,6,9,10,10)
223 id<-c(0,0,0,1,1,1,1,1,1,0,0,0)
224 siegel.tukey(x,id,T)
225
226 ### 7
227 x <- c(85,106,96, 105, 104, 108, 86)
228 id<-c(0,0,1,1,1,1,1)
229 siegel.tukey(x,id,T)
79ba722 @talgalili Adding siegel.tukey.r
authored
230
231 }
232
Something went wrong with that request. Please try again.