Permalink
Newer
Older
100644 193 lines (131 sloc) 6.12 KB
1
siegel.tukey <-function(x,y,id.col=TRUE,adjust.median=F,rnd=-1,alternative="two.sided",mu=0,paired=FALSE,exact=FALSE,correct=TRUE,conf.int=FALSE,conf.level=0.95)
2
{
3
###### published on: http://www.r-statistics.com/2010/02/siegel-tukey-a-non-parametric-test-for-equality-in-variability-r-code/
4
## Main author of the function: Daniel Malter
5
6
# x: a vector of data
7
8
# y: Group indicator (if id.col=TRUE); data of the second group (if
9
# id.col=FALSE). If y is the group indicator it MUST take 0 or 1 to indicate
10
# the groups, and x must contain the data for both groups.
11
12
# id.col: If TRUE (default), then x is the data column and y is the ID column,
13
# indicating the groups. If FALSE, x and y are both data columns. id.col must
14
# be FALSE only if both data columns are of the same length.
15
16
# adjust.median: Should between-group differences in medians be leveled before
17
# performing the test? In certain cases, the Siegel-Tukey test is susceptible
18
# to median differences and may indicate significant differences in
19
# variability that, in reality, stem from differences in medians.
20
21
# rnd: Should the data be rounded and, if so, to which decimal? The default
22
# (-1) uses the data as is. Otherwise, rnd must be a non-negative integer.
23
# Typically, this option is not needed. However, occasionally, differences in
24
# the precision with which certain functions return values cause the merging
25
# of two data frames to fail within the siegel.tukey function. Only then
26
# rounding is necessary. This operation should not be performed if it affects
27
# the ranks of observations.
28
29
# … arguments passed on to the Wilcoxon test. See ?wilcox.test
30
31
# Value: Among other output, the function returns the data, the Siegel-Tukey
32
# ranks, the associated Wilcoxon’s W and the p-value for a Wilcoxon test on
33
# tie-adjusted Siegel-Tukey ranks (i.e., it performs and returns a
34
# Siegel-Tukey test). If significant, the group with the smaller rank sum has
35
# greater variability.
36
37
# References: Sidney Siegel and John Wilder Tukey (1960) “A nonparametric sum
38
# of ranks procedure for relative spread in unpaired samples.” Journal of the
39
# American Statistical Association. See also, David J. Sheskin (2004)
40
# ”Handbook of parametric and nonparametric statistical procedures.” 3rd
41
# edition. Chapman and Hall/CRC. Boca Raton, FL.
42
43
# Notes: The Siegel-Tukey test has relatively low power and may, under certain
44
# conditions, indicate significance due to differences in medians rather than
45
# differences in variabilities (consider using the argument adjust.median).
46
47
# Output (in this order)
48
49
# 1. Group medians (after median adjustment if specified)
50
# 2. Wilcoxon-test for between-group differences in medians (after the median
51
# adjustment if specified)
52
# 3. Data, group membership, and the Siegel-Tukey ranks
53
# 4. Mean Siegel-Tukey rank by group (smaller values indicate greater
54
# variability)
55
# 5. Siegel-Tukey test (Wilcoxon test on tie-adjusted Siegel-Tukey ranks)
56
57
is.formula <- function(x) class(x) == "formula"
58
59
if(is.formula(x))
60
{
61
y <- do.call(c, list(as.name(all.vars(x)[2])), envir = parent.frame(2))
62
x <- do.call(c, list(as.name(all.vars(x)[1])), envir = parent.frame(2)) # I am using parent.frame(2) since if the name of the variable in the equation is "x", then we will mistakenly get the function in here instead of the vector.
63
id.col <- TRUE
64
# print(x)
65
# print(ls.str())
66
# data=data.frame(c(x,y),rep(c(0,1),c(length(x),length(y))))
67
# print(data)
68
}
69
70
if(id.col==FALSE){
71
data=data.frame(c(x,y),rep(c(0,1),c(length(x),length(y))))
72
} else {
73
data=data.frame(x,y)
74
}
75
names(data)=c("x","y")
76
data=data[order(data$x),]
77
if(rnd>-1){data$x=round(data$x,rnd)}
78
79
if(adjust.median==T){
80
cat("\n","Adjusting medians...","\n",sep="")
81
data$x[data$y==0]=data$x[data$y==0]-(median(data$x[data$y==0]))
82
data$x[data$y==1]=data$x[data$y==1]-(median(data$x[data$y==1]))
83
}
84
cat("\n","Median of group 1 = ",median(data$x[data$y==0]),"\n",sep="")
85
cat("Median of group 2 = ",median(data$x[data$y==1]),"\n","\n",sep="")
86
cat("Testing median differences...","\n")
87
print(wilcox.test(data$x[data$y==0],data$x[data$y==1]))
88
89
# The following must be done for the case when id.col==F
90
x <- data$x
91
y <- data$y
92
93
cat("Performing Siegel-Tukey rank transformation...","\n","\n")
94
97
sort.x<-sort(data$x)
98
sort.id<-data$y[order(data$x)]
99
100
data.matrix<-data.frame(sort.x,sort.id)
101
102
base1<-c(1,4)
103
iterator1<-matrix(seq(from=1,to=length(x),by=4))-1
104
rank1<-apply(iterator1,1,function(x) x+base1)
105
106
iterator2<-matrix(seq(from=2,to=length(x),by=4))
107
base2<-c(0,1)
108
rank2<-apply(iterator2,1,function(x) x+base2)
109
110
#print(rank1)
111
#print(rank2)
112
113
if(length(rank1)==length(rank2)){
114
rank<-c(rank1[1:floor(length(x)/2)],rev(rank2[1:ceiling(length(x)/2)]))
115
} else{
116
rank<-c(rank1[1:ceiling(length(x)/2)],rev(rank2[1:floor(length(x)/2)]))
117
}
118
119
120
unique.ranks<-tapply(rank,sort.x,mean)
121
unique.x<-as.numeric(as.character(names(unique.ranks)))
122
123
rank.matrix<-data.frame(unique.x,unique.ranks)
124
125
ST.matrix<-merge(data.matrix,rank.matrix,by.x="sort.x",by.y="unique.x")
126
127
print(ST.matrix)
128
129
cat("\n","Performing Siegel-Tukey test...","\n",sep="")
130
131
ranks0<-ST.matrix$unique.ranks[ST.matrix$sort.id==0]
132
ranks1<-ST.matrix$unique.ranks[ST.matrix$sort.id==1]
133
134
cat("\n","Mean rank of group 0: ",mean(ranks0),"\n",sep="")
135
cat("Mean rank of group 1: ",mean(ranks1),"\n",sep="")
136
137
print(wilcox.test(ranks0,ranks1,alternative=alternative,mu=mu,paired=paired,exact=exact,correct=correct,conf.int=conf.int,conf.level=conf.level))
138
}
139
140
141
142
143
144
145
146
147
if(F) {
148
149
#Example:
150
151
x=c(4,4,5,5,6,6)
152
y=c(0,0,1,9,10,10)
153
siegel.tukey(x,y, F)
154
155
# example for a non equal number of cases:
156
x=c(4,4,5,5,6,6)
157
y=c(0,0,1,9,10)
158
siegel.tukey(x,y,F)
159
160
161
x <- c(33, 62, 84, 85, 88, 93, 97, 4, 16, 48, 51, 66, 98)
162
id <- c(0,0,0,0,0,0,0,1,1,1,1,1,1)
163
siegel.tukey(x,id)
164
siegel.tukey(x~id) # from now on, this also works as a function...
165
siegel.tukey(x,id,adjust.median=F,exact=T)
166
167
x<-c(0,0,1,4,4,5,5,6,6,9,10,10)
168
id<-c(0,0,0,1,1,1,1,1,1,0,0,0)
169
170
siegel.tukey(x,id)
171
172
x <- c(85,106,96, 105, 104, 108, 86)
173
id<-c(0,0,1,1,1,1,1)
174
175
siegel.tukey(x,id)
176
177
x<-c(177,200,227,230,232,268,272,297,47,105,126,142,158,172,197,220,225,230,262,270)
178
id<-c(rep(0,8),rep(1,12))
179
180
siegel.tukey(x,id,adjust.median=T)
181
182
183
184
185
x=c(33,62,84,85,88,93,97)
186
y=c(4,16,48,51,66,98)
187
188
siegel.tukey(x,y)
189
190
191
}
192