-
Notifications
You must be signed in to change notification settings - Fork 0
/
Code 02. data model development
160 lines (132 loc) · 7.17 KB
/
Code 02. data model development
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
### [discrete Data model]
#
# covariate w/o NA: ----------> X1,X2
#-> dbinom(x, size, prob=piparam)*dnorm(x, mean=muparam, sd=tau2param)
# covariate with NA in X1: ---> X2
#-> dnorm(x, mean=muparam, sd=tau2param)
# Outcome w/o NA: ------------> Sh|X1,X2
#-> dlogsknorm_log(y, x, beta=beta_j, sig2=sig2_j, xi=xi_j)
# outcome with NA: -----------> Sh|X1,X2 then marginalize w.r.t X1
#-> dlogsknorm_log(y, x, beta=beta_j, sig2=sig2_j, xi=xi_j)
### [continuous (Parameter-free) Data model]
#
# covariate model w/o NA: ----------> int X1,X2 w.r.t "piparam","muparam","tau2param"
f0x1 = function(x1) {
beta(x1+c0, 1-x1+d0)/beta(c0,d0)
}
f0x2 = function(x2) {
gam0^e0*gamma(e0+1/2)/(2*sqrt(pi)*gamma(e0))*(gam0+(x2-mu0)^2/4)^(-(e0+1/2))
}
#::: f0x = f0x1(X1)*f0x2(X2)
# covariate model with NA in X1: ---> int X2 w.r.t "muparam","tau2param"
f0x2 = function(x2) {
gam0^e0*gamma(e0+1/2)/(2*sqrt(pi)*gamma(e0))*(gam0+(x2-mu0)^2/4)^(-(e0+1/2))
}
#::: f0x = f0x2
# Outcome ...with+w/o NA: ------------------------------> MonteCarlo Integration
# Calculate Outcome and Covariate parameter free data model for each observation
n=length(Y)
f0y = numeric(n) #% param-free outcome model f0(y|x)
f0x = numeric(n) #% param-free covariate model
E0y = numeric(n) #% param-free E(Y|x) = Expected value of Y|x ~ f0(y|x)
M = 1000 # Number of Monte Carlo samples
sumy = numeric(M)
sumEy = numeric(M)
for(i in 1:n) {
if(!is.na(X1[i])) { # --------------------------------------------------------- # [ When no NA in X1 ] #
f0x[i] = f0x1(X1[i])*f0x2(X2[i]) #### Look at your covariate model #####
# Monte Carlo integration for Y (w/o NA) for ### outcome model ###
#sumy = numeric(M)
#sumEy = numeric(M)
for(j in 1:M) {
xi_samplej = rt(n = 1, df = nu0) # prior on xi
sig_samplej = rinvgamma(n = 1, shape = a0, scale = b0) # prior on sig2
beta_samplej = rmvn(n = 1, mu = beta0, sigma = SIG_b0) # prior on beta
betat_samplej = rmvn(n = 1, mu = betat0, sigma = SIG_bt0) # prior on tilde beta
if(Y[i]>1){ # Y>1, when Sh>0
sumy[j] =
(1-sigmoid( sum(matX[i,]*betat_samplej) ))* # P(Sh > 0) with complete
dlogsknorm( y=Y[i],
x=matX[i,],
beta=beta_samplej,
sig2=sig_samplej,
xi=xi_samplej )* # to outcome with complete
dmvn(X = beta_samplej, mu = beta0, sigma = SIG_b0)* # to joint beta
dinvgamma(x = sig_samplej, shape = a0, scale = b0)* # to joint sig2
dt(x = xi_samplej, df = nu0)* # to joint xi
dmvn(X = betat_samplej, mu = betat0, sigma = SIG_bt0) # to joint beta tilde
}
else { # Y=1, when Sh=0
sumy[j] =
(sigmoid( sum(matX[i,]*betat_samplej) ))* # P(Sh = 0) with complete
dmvn(X = betat_samplej, mu = betat0, sigma = SIG_bt0) # to joint beta tilde
}
sumEy[j] = (1-sigmoid( sum(matX[i,]*betat_samplej) ))*
2*exp(sum(matX[i,]*beta_samplej+sig_samplej/2))*
(1-pnorm(-xi_samplej*sqrt(sig_samplej)/sqrt(xi_samplej^2+1)))
#print(sumEy)
}
f0y[i] = sum(sumy)/M # Outcome model w/o NA in X1
E0y[i] = sum(sumEy)/M # param-free E[Outcome] value model w/o NA in X1
}
else if(is.na(X1[i])) { # ----------------------------------------------------- # [ When NA in X1 ] #
f0x[i] = f0x2(X2[i]) #### Look at your covariate model #####
# Monte Carlo integration for Y (with NA) ### outcome model ###
#sumy = numeric(M)
#sumEy = numeric(M)
for(j in 1:M) {
xi_samplej = rt(n = 1, df = nu0) # prior for xi
sig_samplej = rinvgamma(n = 1, shape = a0, scale = b0) # prior for sig2
beta_samplej = rmvn(n = 1, mu = beta0, sigma = SIG_b0) # prior for beta
betat_samplej = rmvn(n = 1, mu = betat0, sigma = SIG_bt0) # prior on tilde beta
# In addition.....
pi_samplej = rbeta(n = 1, shape1 = c0, shape2 = d0) # To integrate over the missing covariate!!!!
if(Y[i]>1){ # Y>1, when Sh>0
sumy[j] =
( (1-sigmoid( sum(c(matX[i,1], 1, matX[i,3])*betat_samplej) ))* # P(Sh > 0) with NA
dlogsknorm( y=Y[i],
x=c(matX[i,1], 1, matX[i,3]),
beta=beta_samplej,
sig2=sig_samplej,
xi=xi_samplej )*pi_samplej +
(1-sigmoid( sum(c(matX[i,1], 0, matX[i,3])*betat_samplej) ))*
dlogsknorm( y=Y[i],
x=c(matX[i,1], 0, matX[i,3]),
beta=beta_samplej,
sig2=sig_samplej,
xi=xi_samplej)*(1 - pi_samplej) )* # to outcome with NA
dmvn(X=beta_samplej, mu=beta0, sigma=SIG_b0)* # to joint beta
dinvgamma(x=sig_samplej, shape=a0, scale=b0)* # to joint sig2
dt(x=xi_samplej, df=nu0)* # to joint xi
dbeta(pi_samplej, shape1=c0, shape2=d0)* # to joint pi (for x1: NA)
dmvn(X=betat_samplej, mu=betat0, sigma=SIG_bt0) # to joint beta tilde
}
else {
sumy[j] =
( (sigmoid( sum(c(matX[i,1], 1, matX[i,3])*betat_samplej) ))* # P(Sh = 0) with NA
pi_samplej +
(sigmoid( sum(c(matX[i,1], 0, matX[i,3])*betat_samplej) ))*
(1 - pi_samplej) )*
dbeta(pi_samplej, shape1=c0, shape2=d0)* # to joint pi (for x1: NA)
dmvn(X = betat_samplej, mu = betat0, sigma = SIG_bt0) # to joint beta tilde
}
sumEy[j] = (1-sigmoid( sum(c(matX[i,1],0,matX[i,3])*betat_samplej) ))*
2*exp(sum(c(matX[i,1],0,matX[i,3])*beta_samplej+sig_samplej/2))*
(1-pnorm(-xi_samplej*sqrt(sig_samplej)/sqrt(xi_samplej^2+1)))*(1-pi_samplej) +
(1-sigmoid( sum(c(matX[i,1],1,matX[i,3])*betat_samplej) ))*
2*exp(sum(c(matX[i,1],1,matX[i,3])*beta_samplej+sig_samplej/2))*
(1-pnorm(-xi_samplej*sqrt(sig_samplej)/sqrt(xi_samplej^2+1)))*pi_samplej
#print(sumEy)
}
f0y[i] = sum(sumy)/M # param-free Outcome density model with NA in X1
E0y[i] = sum(sumEy)/M # param-free E[Outcome] value model with NA in X1
}
print(paste("i=",i))
#print(E0y)
}
plot( x=density(f0y) ) # param-free outcome density model by sampling param and plugging [obv, param] into "Monte Carlo Integration"
plot( x=density(f0x) ) # param-free covariate density model by plugging [obv] into "posterior density" analytically produced
plot( x=density(E0y) ) # param-free E[outcome] value model
boxplot(f0x ~ X1miss) # f0(X1,X2)
# False: if x1 is not missing, the values of f0x (covariate model) are much smaller than the other case (True).
# This makes sense coz..u r not multiplying the density function of x1 (when x1 has NA, we drop x1..).