Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
branch: master
233 lines (180 sloc) 7.47 kB
siegel.tukey <- function(x, y, id.col = FALSE, adjust.median = F,
rnd = -1, alternative = "two.sided", mu = 0, paired = FALSE,
exact = FALSE, correct = TRUE, conf.int = FALSE, conf.level = 0.95) {
###### published on:
# http://www.r-statistics.com/2010/02/siegel-tukey-a-non-parametric-test-for-equality-in-variability-r-code/
## Main author of the function: Daniel Malter
# x: a vector of data
# y: Group indicator (if id.col=TRUE); data of the second
# group (if
# id.col=FALSE). If y is the group indicator it MUST take 0
# or 1 to indicate
# the groups, and x must contain the data for both groups.
# id.col: If TRUE (default), then x is the data column and y
# is the ID column,
# indicating the groups. If FALSE, x and y are both data
# columns. id.col must
# be FALSE only if both data columns are of the same length.
# adjust.median: Should between-group differences in medians
# be leveled before
# performing the test? In certain cases, the Siegel-Tukey
# test is susceptible
# to median differences and may indicate significant
# differences in
# variability that, in reality, stem from differences in
# medians.
# rnd: Should the data be rounded and, if so, to which
# decimal? The default
# (-1) uses the data as is. Otherwise, rnd must be a
# non-negative integer.
# Typically, this option is not needed. However,
# occasionally, differences in
# the precision with which certain functions return values
# cause the merging
# of two data frames to fail within the siegel.tukey
# function. Only then
# rounding is necessary. This operation should not be
# performed if it affects
# the ranks of observations.
# … arguments passed on to the Wilcoxon test. See
# ?wilcox.test
# Value: Among other output, the function returns the data,
# the Siegel-Tukey
# ranks, the associated Wilcoxon’s W and the p-value for a
# Wilcoxon test on
# tie-adjusted Siegel-Tukey ranks (i.e., it performs and
# returns a
# Siegel-Tukey test). If significant, the group with the
# smaller rank sum has
# greater variability.
# References: Sidney Siegel and John Wilder Tukey (1960) “A
# nonparametric sum
# of ranks procedure for relative spread in unpaired
# samples.” Journal of the
# American Statistical Association. See also, David J.
# Sheskin (2004)
# ”Handbook of parametric and nonparametric statistical
# procedures.” 3rd
# edition. Chapman and Hall/CRC. Boca Raton, FL.
# Notes: The Siegel-Tukey test has relatively low power and
# may, under certain
# conditions, indicate significance due to differences in
# medians rather than
# differences in variabilities (consider using the argument
# adjust.median).
# Output (in this order)
# 1. Group medians (after median adjustment if specified)
# 2. Wilcoxon-test for between-group differences in medians
# (after the median
# adjustment if specified)
# 3. Data, group membership, and the Siegel-Tukey ranks
# 4. Mean Siegel-Tukey rank by group (smaller values indicate
# greater
# variability)
# 5. Siegel-Tukey test (Wilcoxon test on tie-adjusted
# Siegel-Tukey ranks)
is.formula <- function(x) class(x) == "formula"
if (is.formula(x)) {
y <- do.call(c, list(as.name(all.vars(x)[2])), envir = parent.frame(2))
x <- do.call(c, list(as.name(all.vars(x)[1])), envir = parent.frame(2)) # I am using parent.frame(2) since if the name of the variable in the equation is 'x', then we will mistakenly get the function in here instead of the vector.
id.col <- TRUE
# print(x)
# print(ls.str())
# data=data.frame(c(x,y),rep(c(0,1),c(length(x),length(y))))
# print(data)
}
if (id.col == FALSE) {
data = data.frame(c(x, y), rep(c(0, 1), c(length(x), length(y))))
} else {
data = data.frame(x, y)
}
names(data) = c("x", "y")
data = data[order(data$x), ]
if (rnd > -1) {
data$x = round(data$x, rnd)
}
if (adjust.median == T) {
cat("\n", "Adjusting medians...", "\n", sep = "")
data$x[data$y == 0] = data$x[data$y == 0] - (median(data$x[data$y ==
0]))
data$x[data$y == 1] = data$x[data$y == 1] - (median(data$x[data$y ==
1]))
}
cat("\n", "Median of group 1 = ", median(data$x[data$y == 0]),
"\n", sep = "")
cat("Median of group 2 = ", median(data$x[data$y == 1]), "\n",
"\n", sep = "")
cat("Testing median differences...", "\n")
print(wilcox.test(data$x[data$y == 0], data$x[data$y == 1]))
# The following must be done for the case when id.col==F
x <- data$x
y <- data$y
cat("Performing Siegel-Tukey rank transformation...", "\n",
"\n")
sort.x <- sort(data$x)
sort.id <- data$y[order(data$x)]
data.matrix <- data.frame(sort.x, sort.id)
base1 <- c(1, 4)
iterator1 <- matrix(seq(from = 1, to = length(x), by = 4)) -
1
rank1 <- apply(iterator1, 1, function(x) x + base1)
iterator2 <- matrix(seq(from = 2, to = length(x), by = 4))
base2 <- c(0, 1)
rank2 <- apply(iterator2, 1, function(x) x + base2)
#print(rank1)
#print(rank2)
if (length(rank1) == length(rank2)) {
rank <- c(rank1[1:floor(length(x)/2)], rev(rank2[1:ceiling(length(x)/2)]))
} else {
rank <- c(rank1[1:ceiling(length(x)/2)], rev(rank2[1:floor(length(x)/2)]))
}
unique.ranks <- tapply(rank, sort.x, mean)
unique.x <- as.numeric(as.character(names(unique.ranks)))
rank.matrix <- data.frame(unique.x, unique.ranks)
ST.matrix <- merge(data.matrix, rank.matrix, by.x = "sort.x",
by.y = "unique.x")
print(ST.matrix)
cat("\n", "Performing Siegel-Tukey test...", "\n", sep = "")
ranks0 <- ST.matrix$unique.ranks[ST.matrix$sort.id == 0]
ranks1 <- ST.matrix$unique.ranks[ST.matrix$sort.id == 1]
cat("\n", "Mean rank of group 0: ", mean(ranks0), "\n", sep = "")
cat("Mean rank of group 1: ", mean(ranks1), "\n", sep = "")
print(wilcox.test(ranks0, ranks1, alternative = alternative,
mu = mu, paired = paired, exact = exact, correct = correct,
conf.int = conf.int, conf.level = conf.level))
}
if(F) {
#Example:
### 1
x=c(4,4,5,5,6,6)
y=c(0,0,1,9,10,10)
siegel.tukey(x,y, F)
siegel.tukey(x,y) #same as above
### 2
# example for a non equal number of cases:
x=c(4,4,5,5,6,6)
y=c(0,0,1,9,10)
siegel.tukey(x,y,F)
### 3
x <- c(33, 62, 84, 85, 88, 93, 97, 4, 16, 48, 51, 66, 98)
id <- c(0,0,0,0,0,0,0,1,1,1,1,1,1)
siegel.tukey(x,id,T)
siegel.tukey(x~id) # from now on, this also works as a function...
siegel.tukey(x,id,T,adjust.median=F,exact=T)
### 4
x<-c(177,200,227,230,232,268,272,297,47,105,126,142,158,172,197,220,225,230,262,270)
id<-c(rep(0,8),rep(1,12))
siegel.tukey(x,id,T,adjust.median=T)
### 5
x=c(33,62,84,85,88,93,97)
y=c(4,16,48,51,66,98)
siegel.tukey(x,y)
### 6
x<-c(0,0,1,4,4,5,5,6,6,9,10,10)
id<-c(0,0,0,1,1,1,1,1,1,0,0,0)
siegel.tukey(x,id,T)
### 7
x <- c(85,106,96, 105, 104, 108, 86)
id<-c(0,0,1,1,1,1,1)
siegel.tukey(x,id,T)
}
Jump to Line
Something went wrong with that request. Please try again.