/
smodel_fqlearning_group.stan
111 lines (92 loc) · 2.54 KB
/
smodel_fqlearning_group.stan
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
// Model 1: normal Q-learning
data {
int<lower=1> N ; // number of subjects (or sessions)
int<lower=1> T ; // Total trial (over subjects)
int<lower=1,upper=2> c[N,T]; // choice
real r[N,T]; // reward
int WBICmode; // 0:bayes, 1:sampling for WBIC
}
parameters {
real mu_p_alpha;
real<lower=0> sigma_p_alpha;
real mu_p_beta;
real<lower=0> sigma_p_beta;
real eta_alpha[N];
real eta_beta[N];
}
transformed parameters {
real<lower=0.0,upper=1.0> alpha[N];
real<lower=0.0> beta[N];
real<lower=0.0,upper=1.0> alpha_p;
real<lower=0.0> beta_p;
for (n in 1:N) {
alpha[n] = inv_logit(mu_p_alpha + sigma_p_alpha * eta_alpha[n]);
beta[n] = 20 * inv_logit(mu_p_beta + sigma_p_beta * eta_beta[n]);
}
alpha_p = inv_logit(mu_p_alpha);
beta_p = exp(mu_p_beta);
}
model {
matrix[2,T] Q; // Q values (option x trial)
vector[2] tmp;
int presub;
// population distribution
mu_p_alpha ~ normal(0,1.5);
sigma_p_alpha ~ uniform(0.0, 1.5);
mu_p_beta ~ normal(0,1.5);
sigma_p_beta ~ uniform(0.0, 1.5); # uniform(0.01, 10.0);
eta_alpha ~ normal(0,1);
eta_beta ~ normal(0,1);
for ( i in 1:N ) {
// initial value set
Q[1, 1] = 0;
Q[2, 1] = 0;
for ( t in 1:T ) {
if (WBICmode) {
target +=
1/log(N*T) * // inverse temperature for WBIC 1/log(number of samples)
log(
1.0/(1.0 + exp(-beta[i] * (Q[c[i,t],t] - Q[3-c[i,t],t])))
);
} else {
target +=
log(
1.0/(1.0 + exp(-beta[i] * (Q[c[i,t],t] - Q[3-c[i,t],t])))
);
}
// update action value
if (t < T) {
// chosen action
Q[c[i,t], t+1] = Q[c[i,t], t] + alpha[i] * (r[i,t] - Q[c[i,t], t]);
// unchosen action
Q[3-c[i,t], t+1] = (1-alpha[i]) * Q[3-c[i,t], t];
}
}
}
}
generated quantities {
vector[N*T] log_lik;
{
matrix[2,T] Q; // Q values (option x trial)
vector[2] tmp;
int trial_count;
trial_count = 0;
for ( i in 1:N ) {
// initial value set
Q[1, 1] = 0;
Q[2, 1] = 0;
for ( t in 1:T ) {
trial_count = trial_count + 1;
log_lik[trial_count] =
log( 1.0/(1.0 + exp(-beta[i] * (Q[c[i,t],t] - Q[3-c[i,t],t]))) );
// update action value
if (t < T) {
// chosen action
Q[c[i,t], t+1] = Q[c[i,t], t] + alpha[i] * (r[i,t] - Q[c[i,t], t]);
// unchosen action
Q[3-c[i,t], t+1] = (1-alpha[i]) * Q[3-c[i,t], t];
}
}
}
}
}