In [None]:
# Code for varying parameters


In [9]:
model_dir <- "../../../tmp_models/"

In [1]:
m2_params_list <- list(
    "1" =list("gamma"=list(1,1), "beta"=list(1,1,1,1)),
    "2" =list("gamma"=list(1,1), "beta"=list(5,5,5,5)),
    "3" =list("gamma"=list(0.1, 0.1), "beta"=list(1,1,1,1)),
    "4" =list("gamma"=list(1,1), "beta"=list(5, 1, 1, 1)),
    "5" =list("gamma"=list(1,1), "beta"=list(5, 2, 2, 1)),
    "6" =list("gamma"=list(1,1), "beta"=list(2, 2, 2, 1)),
    "7" =list("gamma"=list(1,1), "beta"=list(1, 1, 1, 2)),
    "8" =list("gamma"=list(0.1, 0.1), "beta"=list(5,5,5,5))

)

m1_params_list <- list(
    "1"=list("tau"=2.5, "lkj"=2, "beta"=list(1,1)),
    "2"=list("tau"=2.5, "lkj"=2, "beta"=list(5,5)),
     "3"=list("tau"=2.5, "lkj"=2, "beta"=list(2,2)),
     "4"=list("tau"=2.5, "lkj"=2, "beta"=list(1,5)),
     "5"=list("tau"=2.5, "lkj"=2, "beta"=list(5,1)),
     "6"=list("tau"=2.5, "lkj"=0.5, "beta"=list(1,1)),
     "7"=list("tau"=2.5, "lkj"=1, "beta"=list(1,1)),
     "8"=list("tau"=2.5, "lkj"=1.5, "beta"=list(1,1)),
     "9"=list("tau"=2.5, "lkj"=2, "beta"=list(1,1)),
     "10"=list("tau"=2.5, "lkj"=2.5, "beta"=list(1,1)),
     "11"=list("tau"=2.5, "lkj"=3, "beta"=list(1,1)),
    "12"=list("tau"=0.5, "lkj"=2, "beta"=list(1,1)),
     "13"=list("tau"=25, "lkj"=2, "beta"=list(1,1))
)

In [55]:
write_m1_text <- function(params, param_id) {
    tau <- params$tau
    lkj <- params$lkj
    beta <- params$beta
    m1_text <- sprintf("data {
    int<lower=0> N; 
    int<lower=1> M; 
    matrix[N, M] B; 
    matrix[N, M] SE; 
    int<lower=1> K; 
}
transformed data{
    vector[M] zeros;
    matrix[M,M] SE_mat[N];
    vector[2] dl;

    zeros = rep_vector(0, M);

    dl[1] = %s;
    dl[2] = %s;
    for (n in 1:N) {
        SE_mat[n] = diag_matrix(to_vector(SE[n]));
    }
}

parameters {
    simplex[K] pi; // mixing proportions
    cholesky_factor_corr[M] L_Omega;
    vector<lower=0>[M] tau;
}

transformed parameters{
    matrix[M, M] Sigma;
    matrix[M, M] Sigmas[K];
    Sigma = diag_pre_multiply(tau, L_Omega)*diag_pre_multiply(tau, L_Omega)';

    Sigmas[1] = diag_matrix(rep_vector(0,M));
    Sigmas[2] = Sigma;
}

model {
    vector[K] ps; // contributions of each

    tau ~ cauchy(0, %s);
    L_Omega ~ lkj_corr_cholesky(%s);
    pi ~ dirichlet(dl);

    for (n in 1:N){
       // two components
       for (k in 1:K){
           ps[k] = log(pi[k]) + multi_normal_lpdf(B[n] | zeros, SE_mat[n] + Sigmas[k]);
       }
       target += log_sum_exp(ps);

     }
}

generated quantities {
    vector[K] ps;
    vector[N] log_lik;
    matrix[M,M] Omegacor;

    Omegacor = multiply_lower_tri_self_transpose(L_Omega);
    
    for (n in 1:N){
        for (k in 1:K){
           ps[k] = log(pi[k]) + multi_normal_lpdf(B[n] | zeros, SE_mat[n] + Sigmas[k]);
        }
        log_lik[n] = log_sum_exp(ps);
    }
}
",  beta[1], beta[2], tau, lkj)
    print(m1_text)
    sink(sprintf("%s/m1_%s.stan", model_dir, param_id))
    cat(m1_text)
    sink()
    }

In [51]:
write_m2_text <- function(params, param_id) {
    gamma <- params$gamma
    beta <- params$beta

    m2_text <- sprintf("data {
    int<lower=1> K; 
    int<lower=1> N; 
    int<lower=1> M; 

    matrix[N, M] B; 
    matrix[N, M] SE;
}

transformed data{
    vector[M] zeros; 
    vector[4] dl;

    zeros = rep_vector(0, M);
    dl[1] = %s;
    dl[2] = %s;
    dl[3] = %s;
    dl[4] = %s;
}


parameters {
    simplex[K] pi; 
    vector<lower=0>[4] sigmasq;

}
transformed parameters{

    matrix[M,M] Sigma[K];
    vector[2] a;
    vector[2] b;
    vector[2] c;

    a[1] = sigmasq[1];
    a[2] = 0.0;
    b[1] = 0.0;
    b[2] = sigmasq[2];
    c[1] = sigmasq[3];
    c[2] = sigmasq[4];

    Sigma[1] = diag_matrix(rep_vector(0,2));
    Sigma[2] = diag_matrix(a);
    Sigma[3] = diag_matrix(b);
    Sigma[4] = diag_matrix(c);



}


model {
    vector[K] ps; 


    sigmasq ~ inv_gamma(%s,%s);
    pi ~ dirichlet(dl); 

    for (n in 1:N){
        for (k in 1:K){
            ps[k]  = log(pi[k]) + multi_normal_lpdf(B[n] | zeros, diag_matrix(to_vector(SE[n])) + Sigma[k]);
        }
        target += log_sum_exp(ps);
    }
}
",  beta[1], beta[2], beta[3], beta[4], gamma[1], gamma[2])
    
    print(m2_text)
    
    sink(sprintf("%s/m2_%s.stan", model_dir, param_id))
    cat(m2_text, append=TRUE)
    sink()
    }

In [None]:
runM2 <- function(param_id){
    
    dat <- loadDat(trait)
    dat$dat$K <- 4
    params <- m2_param_list[param_id]
    write_m2(params, param_id)
    options(mc.cores = parallel::detectCores())
    rstan_options(auto_write = TRUE)
    fit2 <- stan(file=sprintf("%s/m2_%s.stan", model_dir, param_id),  
            data = dat$dat,    
            chains = 4, warmup =200, iter = 600, cores = 4, refresh = 200)
  
    print(fit2, pars=c("sigmasq", "pi", "Sigma"), probs=c(0.025, 0.5, 0.975), digits_summary=5)
    print("SAVING")
    rm(dat)
    save(fit2, file=sprintf("%s/m2/f_m2_%s_ll_%s.RData", out_dir, trait, param_id))
}

In [56]:
write_m1_text(m1_params_list[["3"]], "3")

[1] "data {\n    int<lower=0> N; \n    int<lower=1> M; \n    matrix[N, M] B; \n    matrix[N, M] SE; \n    int<lower=1> K; \n}\ntransformed data{\n    vector[M] zeros;\n    matrix[M,M] SE_mat[N];\n    vector[2] dl;\n\n    zeros = rep_vector(0, M);\n\n    dl[1] = 2;\n    dl[2] = 2;\n    for (n in 1:N) {\n        SE_mat[n] = diag_matrix(to_vector(SE[n]));\n    }\n}\n\nparameters {\n    simplex[K] pi; // mixing proportions\n    cholesky_factor_corr[M] L_Omega;\n    vector<lower=0>[M] tau;\n}\n\ntransformed parameters{\n    matrix[M, M] Sigma;\n    matrix[M, M] Sigmas[K];\n    Sigma = diag_pre_multiply(tau, L_Omega)*diag_pre_multiply(tau, L_Omega)';\n\n    Sigmas[1] = diag_matrix(rep_vector(0,M));\n    Sigmas[2] = Sigma;\n}\n\nmodel {\n    vector[K] ps; // contributions of each\n\n    tau ~ cauchy(0, 2.5);\n    L_Omega ~ lkj_corr_cholesky(2);\n    pi ~ dirichlet(dl);\n\n    for (n in 1:N){\n       // two components\n       for (k in 1:K){\n           ps[k] = log(pi[k]) + multi_normal_lpdf(B

In [35]:
m2_params_list[["3"]]

In [52]:
write_m2_text(m2_params_list[["3"]], "3")

[1] "data {\n    int<lower=1> K; \n    int<lower=1> N; \n    int<lower=1> M; \n\n    matrix[N, M] B; \n    matrix[N, M] SE;\n}\n\ntransformed data{\n    vector[M] zeros; \n    vector[4] dl;\n\n    zeros = rep_vector(0, M);\n    dl[1] = 1;\n    dl[2] = 1;\n    dl[3] = 1;\n    dl[4] = 1;\n}\n\n\nparameters {\n    simplex[K] pi; \n    vector<lower=0>[4] sigmasq;\n\n}\ntransformed parameters{\n\n    matrix[M,M] Sigma[K];\n    vector[2] a;\n    vector[2] b;\n    vector[2] c;\n\n    a[1] = sigmasq[1];\n    a[2] = 0.0;\n    b[1] = 0.0;\n    b[2] = sigmasq[2];\n    c[1] = sigmasq[3];\n    c[2] = sigmasq[4];\n\n    Sigma[1] = diag_matrix(rep_vector(0,2));\n    Sigma[2] = diag_matrix(a);\n    Sigma[3] = diag_matrix(b);\n    Sigma[4] = diag_matrix(c);\n\n\n\n}\n\n\nmodel {\n    vector[K] ps; \n\n\n    sigmasq ~ inv_gamma(0.1,0.1);\n    pi ~ dirichlet(dl); \n\n    for (n in 1:N){\n        for (k in 1:K){\n            ps[k]  = log(pi[k]) + multi_normal_lpdf(B[n] | zeros, diag_matrix(to_vector(SE[n]

In [15]:
load(sprintf("../../../data/toy_run/dat_Test_chr22.RData"))

In [17]:
require('rstan')

Loading required package: rstan

Loading required package: StanHeaders

Loading required package: ggplot2

rstan (Version 2.19.3, GitRev: 2e1f913d3ca3)

For execution on a local, multicore CPU with excess RAM we recommend calling
options(mc.cores = parallel::detectCores()).
To avoid recompilation of unchanged Stan programs, we recommend calling
rstan_options(auto_write = TRUE)



In [57]:
param_id <- "3"
 dat$dat$K <- 4
fit <- stan(file=sprintf("%s/m2_%s.stan", model_dir, param_id),  
            data = dat$dat,    
            chains = 1, warmup =200, iter = 600, cores = 4, refresh = 200)
rm(dat)
print("saving")
save(fit, file=sprintf("%s/m1_fit_%s.RData", out_dir, param_id))




SAMPLING FOR MODEL 'm1_3' NOW (CHAIN 1).
Chain 1: 
Chain 1: Gradient evaluation took 0.03 seconds
Chain 1: 1000 transitions using 10 leapfrog steps per transition would take 300 seconds.
Chain 1: Adjust your expectations accordingly!
Chain 1: 
Chain 1: 
Chain 1: Iteration:   1 / 600 [  0%]  (Warmup)
Chain 1: Iteration: 200 / 600 [ 33%]  (Warmup)
Chain 1: Iteration: 201 / 600 [ 33%]  (Sampling)
Chain 1: Iteration: 400 / 600 [ 66%]  (Sampling)
Chain 1: Iteration: 600 / 600 [100%]  (Sampling)
Chain 1: 
Chain 1:  Elapsed Time: 60.92 seconds (Warm-up)
Chain 1:                119.89 seconds (Sampling)
Chain 1:                180.81 seconds (Total)
Chain 1: 


“The largest R-hat is NA, indicating chains have not mixed.
Running the chains for more iterations may help. See
“Bulk Effective Samples Size (ESS) is too low, indicating posterior means and medians may be unreliable.
Running the chains for more iterations may help. See
“Tail Effective Samples Size (ESS) is too low, indicating posterior variances and tail quantiles may be unreliable.
Running the chains for more iterations may help. See


In [19]:
dim(dat$dat$B)

In [58]:
require('loo')
print("extracting loo")
log_lik1 <- extract_log_lik(fit)
rm(fit)
loo1 <- loo(log_lik1)
print(loo1)
save(loo1, file=sprintf("%s/loo_m1_%s.RData", out_dir, param_id))
waic1 <- waic(log_lik1) 
print(waic1)
save(waic1, file=sprintf("%s/waic_m1_%s.RData", out_dir, param_id))


Loading required package: loo

This is loo version 2.2.0

- Online documentation and vignettes at mc-stan.org/loo

- As of v2.0.0 loo defaults to 1 core but we recommend using as many as possible. Use the 'cores' argument or set options(mc.cores = NUM_CORES) for an entire session. 


Attaching package: ‘loo’


The following object is masked from ‘package:rstan’:

    loo




[1] "extracting loo"


“Relative effective sample sizes ('r_eff' argument) not specified.
For models fit with MCMC, the reported PSIS effective sample sizes and 
MCSE estimates will be over-optimistic.”



Computed from 400 by 5548 log-likelihood matrix

         Estimate    SE
elpd_loo  32305.9 153.3
p_loo         8.6   3.1
looic    -64611.8 306.6
------
Monte Carlo SE of elpd_loo is 0.2.

All Pareto k estimates are good (k < 0.5).
See help('pareto-k-diagnostic') for details.


ERROR: Error in sprintf("%s/loo_m1_%s.RData", out_dir, param_id): object 'out_dir' not found


In [59]:
load("../../../data/dat_Testosterone.RData")

In [60]:
dim(dat$dat$B)