/
AllGOFTests.R
112 lines (105 loc) · 4.53 KB
/
AllGOFTests.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
#####################################################################
# NAME: Tom Loughin #
# DATE: 1-10-13 #
# PURPOSE: Functions to compute Hosmer-Lemeshow, Osius-Rojek, and #
# Stukel goodness-of-fit tests #
# #
# NOTES: #
#####################################################################
# Single R file that contains all three goodness-of fit tests
# Adapted from program published by Ken Kleinman as Exmaple 8.8 on the SAS and R blog, sas-and-r.blogspot.ca
# Assumes data are aggregated into Explanatory Variable Pattern form.
HLTest = function(obj, g) {
# first, check to see if we fed in the right kind of object
stopifnot(family(obj)$family == "binomial" && family(obj)$link == "logit")
y = obj$model[[1]]
trials = rep(1, times = nrow(obj$model))
if(any(colnames(obj$model) == "(weights)"))
trials <- obj$model[[ncol(obj$model)]]
# the double bracket (above) gets the index of items within an object
if (is.factor(y))
y = as.numeric(y) == 2 # Converts 1-2 factor levels to logical 0/1 values
yhat = obj$fitted.values
interval = cut(yhat, quantile(yhat, 0:g/g), include.lowest = TRUE) # Creates factor with levels 1,2,...,g
Y1 <- trials*y
Y0 <- trials - Y1
Y1hat <- trials*yhat
Y0hat <- trials - Y1hat
obs = xtabs(formula = cbind(Y0, Y1) ~ interval)
expect = xtabs(formula = cbind(Y0hat, Y1hat) ~ interval)
if (any(expect < 5))
warning("Some expected counts are less than 5. Use smaller number of groups")
pear <- (obs - expect)/sqrt(expect)
chisq = sum(pear^2)
P = 1 - pchisq(chisq, g - 2)
# by returning an object of class "htest", the function will perform like the
# built-in hypothesis tests
return(structure(list(
method = c(paste("Hosmer and Lemeshow goodness-of-fit test with", g, "bins", sep = " ")),
data.name = deparse(substitute(obj)),
statistic = c(X2 = chisq),
parameter = c(df = g-2),
p.value = P,
pear.resid = pear,
expect = expect,
observed = obs
), class = 'htest'))
}
# Osius-Rojek test
# Based on description in Hosmer and Lemeshow (2000) p. 153.
# Assumes data are aggregated into Explanatory Variable Pattern form.
o.r.test = function(obj) {
# first, check to see if we fed in the right kind of object
stopifnot(family(obj)$family == "binomial" && family(obj)$link == "logit")
mf <- obj$model
trials = rep(1, times = nrow(mf))
if(any(colnames(mf) == "(weights)"))
trials <- mf[[ncol(mf)]]
prop = mf[[1]]
# the double bracket (above) gets the index of items within an object
if (is.factor(prop))
prop = as.numeric(prop) == 2 # Converts 1-2 factor levels to logical 0/1 values
pi.hat = obj$fitted.values
y <- trials*prop
yhat <- trials*pi.hat
nu <- yhat*(1-pi.hat)
pearson <- sum((y - yhat)^2/nu)
c = (1 - 2*pi.hat)/nu
exclude <- c(1,which(colnames(mf) == "(weights)"))
vars <- data.frame(c,mf[,-exclude])
wlr <- lm(formula = c ~ ., weights = nu, data = vars)
rss <- sum(nu*residuals(wlr)^2 )
J <- nrow(mf)
A <- 2*(J - sum(1/trials))
z <- (pearson - (J - ncol(vars) - 1))/sqrt(A + rss)
p.value <- 2*(1 - pnorm(abs(z)))
cat("z = ", z, "with p-value = ", p.value, "\n")
}
# Stukel Test
# Based on description in Hosmer and Lemeshow (2000) p. 155.
# Assumes data are aggregated into Explanatory Variable Pattern form.
stukel.test = function(obj) {
# first, check to see if we fed in the right kind of object
stopifnot(family(obj)$family == "binomial" && family(obj)$link == "logit")
high.prob <- (obj$fitted.values >= 0.5)
logit2 <- obj$linear.predictors^2
z1 = 0.5*logit2*high.prob
z2 = 0.5*logit2*(1-high.prob)
mf <- obj$model
trials = rep(1, times = nrow(mf))
if(any(colnames(mf) == "(weights)"))
trials <- mf[[ncol(mf)]]
prop = mf[[1]]
# the double bracket (above) gets the index of items within an object
if (is.factor(prop))
prop = (as.numeric(prop) == 2) # Converts 1-2 factor levels to logical 0/1 values
pi.hat = obj$fitted.values
y <- trials*prop
exclude <- which(colnames(mf) == "(weights)")
vars <- data.frame(z1, z2, y, mf[,-c(1,exclude)])
full <- glm(formula = y/trials ~ ., family = binomial(link = logit), weights = trials, data = vars)
null <- glm(formula = y/trials ~ ., family = binomial(link = logit), weights = trials, data = vars[,-c(1,2)])
LRT <- anova(null,full)
p.value <- 1 - pchisq(LRT$Deviance[[2]], LRT$Df[[2]])
cat("Stukel Test Stat = ", LRT$Deviance[[2]], "with p-value = ", p.value, "\n")
}