-
Notifications
You must be signed in to change notification settings - Fork 82
/
rstant_testBEST.R
182 lines (136 loc) · 5.91 KB
/
rstant_testBEST.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#-------------------------------------------------------------------------------#
# The following is based on Kruschke's 2012 JEP article 'Bayesian estimation #
# supercedes the t-test (BEST)' with only minor changes to stan model. It uses #
# the JAGS/BUGS code in the paper's Appendix B as the reference. #
#-------------------------------------------------------------------------------#
#######################
### Create the Data ###
#######################
### play around with the specs if you like
set.seed(1234)
N1 = 50
N2 = 50
mu1 = 1
mu2 = -.5
sig1 = 1
sig2 = 1
Ng = 2
y1 = rnorm(N1, mu1, sig1)
y2 = rnorm(N2, mu2, sig2)
y = c(y1, y2)
groupID = as.numeric(gl(2, N1))
## if unbalanced
# group = 1:2
# groupID = rep(group, c(N1,N2))
tapply(y, groupID, psych:::describe)
##################
### Stan Setup ###
##################
standat= list(N=length(y), Ng=Ng, groupID=groupID, y=y)
stanmodelcode = '
data {
int<lower=1> N; // sample size (note:putting bounds provides simple data check)
int<lower=2> Ng; // number of groups
vector[N] y; // response
int<lower=1, upper=Ng> groupID[N]; // group ID
}
transformed data{
real meany; // mean of y; see mu prior
meany = mean(y);
}
parameters {
vector[2] mu; // estimated group means and sd
vector<lower=0>[2] sigma; // Kruschke puts upper bound as well; ignored here
real<lower=0, upper=100> nu; // df for t distribution
}
transformed parameters { // none needed
}
model {
// priors
mu ~ normal(meany, 10); // note that there is a faster implementation of this for stan; sd here is more informative than in Kruschke paper
sigma ~ cauchy(0, 5);
nu ~ exponential(1.0/29); // Based on Kruschke; makes mean nu 29 (might consider upper bound, too large and might as well switch to normal)
// likelihood
for (n in 1:N){
y[n] ~ student_t(nu, mu[groupID[n]], sigma[groupID[n]]);
//y[n] ~ normal(mu[groupID[n]], sigma[groupID[n]]); // for comparison, remove all nu specifications if you do this
}
}
generated quantities {
vector[N] yRep; // posterior predictive distribution
real muDiff; // mean difference
real CohensD; // effect size; see footnote 1 in Kruschke paper
real CLES; // common language effect size
real CLES2; // a more explicit approach; the mean should roughly equal CLES
for (n in 1:N){
yRep[n] = student_t_rng(nu, mu[groupID[n]], sigma[groupID[n]]);
}
muDiff = mu[1] - mu[2];
CohensD = muDiff / sqrt(sum(sigma)/2);
CLES = normal_cdf(muDiff / sqrt(sum(sigma)), 0, 1);
CLES2 = student_t_rng(nu, mu[1], sigma[1]) - student_t_rng(nu, mu[2], sigma[2]) > 0;
}
'
#############################
### Run and inspect model ###
#############################
### Run model/examine basic diagnostic plots
library(rstan)
# you can ignore the informational message
fit = stan(model_code=stanmodelcode, data=standat, iter=12000, warmup=2000, cores=4, thin=10)
shinystan::launch_shinystan(fit)
### Print summary of model
print(fit, digits=3, pars=c('mu', 'sigma', 'muDiff', 'CohensD', 'CLES', 'CLES2','nu','lp__'))
### Extract quantities of interest for more processing/visualization.
yRep = extract(fit, par='yRep')$yRep
# compare population and observed data values to estimates in summary print
# mean difference
muDiff = extract(fit, par='muDiff')$muDiff
means = tapply(y, groupID, mean)
sds = tapply(y, groupID, sd)
mu1-mu2 # based on population values
abs(diff(means)) # observed in data
# Cohen's d
CohensD = extract(fit, par='CohensD')$CohensD
(mu1-mu2) / sqrt((sig1^2+sig2^2)/2) # population
(means[1]-means[2]) / sqrt(sum(sds^2)/2) # observed
# common language effect size is the probability that a randomly selected score from one
# population will be greater than a randomly sampled score from the other
CLES = extract(fit, par='CLES')$CLES
pnorm((mu1-mu2) / sqrt(sig1^2+sig2^2)) # population
pnorm((means[1]-means[2]) / sqrt(sum(sds^2))) # observed
########################
### Model Comparison ###
########################
### Compare to Welch's t-test
t.test(y1,y2)
### Compare to BEST; note that it requires coda, whose traceplot function will overwrite rstan's
library(BEST)
BESTout = BESTmcmc(y1, y2, numSavedSteps=12000, thinSteps=10, burnInSteps=2000)
summary(BESTout)
#####################
### Visualization ###
#####################
library(ggplot2); library(reshape2);
### plot posterior predictive distribution vs. observed data density
gdat = melt(yRep)
str(gdat)
colnames(gdat) = c('iteration', 'observation', 'value' )
gdat$groupID = factor(rep(groupID, e=2000)) # change this to match your sample size/chain length
gdat$observation = factor(gdat$observation)
ggplot(aes(x=value), data=gdat) +
geom_density(aes(group=groupID, fill=groupID), color=NA, alpha=.25) +
geom_line(aes(group=observation, color=groupID), stat='density', alpha=.05) +
geom_point(aes(x=y, y=0, color=factor(groupID)), alpha=.15, size=5, data=data.frame(y, groupID)) +
xlim(c(-8,8)) + # might get a warning if extreme values are cut out
geom_density(aes(group=groupID, color=groupID, x=y), alpha=.05, data.frame(groupID=factor(groupID),y))
### plot mean difference or other values of interest
ggplot(aes(x=muDiff), data=data.frame(muDiff=muDiff)) +
geom_density(alpha=.25) +
xlim(c(0,3.5)) +
geom_point(x=muDiff, y=0, alpha=.01, size=3) +
geom_path(aes(x=quantile(muDiff, c(.025, .975)), y=c(.2,.2)), size=2, alpha=.5, color='darkred', data=data.frame())
### BEST plots
par(mfrow=c(2,2))
sapply(c("mean", "sd", "effect", "nu"), function(p) plot(BESTout, which=p))
layout(1)