### Installing cmdstanr

In [3]:
# we recommend running this is a fresh R session or restarting your current session
install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))
library(cmdstanr)
set_cmdstan_path('/opt/conda/bin/cmdstan')
cmdstan_version()

also installing the dependencies ‘tensorA’, ‘distributional’, ‘posterior’


Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done

This is cmdstanr version 0.4.0

- Online documentation and vignettes at mc-stan.org/cmdstanr

- Use set_cmdstan_path() to set the path to CmdStan

- Use install_cmdstan() to install CmdStan

CmdStan path set to: /opt/conda/bin/cmdstan



In [4]:
packageVersion("cmdstanr")

[1] ‘0.4.0’

### Installing other packages

In [5]:
install.packages("mvShapiroTest")

Updating HTML index of packages in '.Library'

Making 'packages.html' ...
 done



### Runing codes

In [6]:
getwd()

In [7]:
##################################################
#  1. Tools > Terminal > New Terminal
#  Run in Unix Terminal Ctrl-Alt-Enter           #
##################################################
# we recommend running this is a fresh R session or restarting your current session
# install.packages("cmdstanr", repos = c("https://mc-stan.org/r-packages/", getOption("repos")))

library("cmdstanr")
library(mvShapiroTest)
library(tidyverse)

# Set unix directories, /mnt/c/ at beginning for c:/
dir_data <- "/home/sagemaker-user/cmdstanrTest/1_CBC_Recoding" # Where is R List with data
dir_model <- "/home/sagemaker-user/cmdstanrTest/99_Stan_Code" # Location of Stan Code
dir_draws <- "/home/sagemaker-user/cmdstanrTest/2_HB_Run/2_Stan_HB_Run/1_Output" # Where Stan stores draws.  Recommend a folder that does not sync
dir_out <- dir_draws # Location to put final output

start.time <- Sys.time()

# Load data and model
load(file.path(dir_data,"data_conjoint_RegularCoding.RData")) # Load data file
HB_model <- cmdstan_model(file.path(dir_model, "MNL_BartBlockCon_v1_6.stan"), quiet = TRUE, cpp_options = list(stan_threads = TRUE))
HB_model$print() # Just to verify

# Specify chains and threads
threads = list(parallel_chains = 2,
               threads_per_chain = 4)

# Specify constraints (sign only)
# For each parameter: 0 = no constraint, +1 = positive, -1 = negative
P <- data_conjoint$P
con_sign <- rep(0,P)
as.matrix(colnames(data_conjoint$code_master))
con_sign[21:39] <- -1 # Negative utilities for price slopes

# Modeling parameters. We include constraints above here.
# This overwrites/adds to the data file we pass to Stan.
data_model <- list(
  con_sign = con_sign,
  prior_cov = data_conjoint$prior_cov * 1, # Change cov scale here  
  df = 5, # Degrees of freedom
  prior_alpha = rep(0, P),
  a_sig = 10,
  cov_block = matrix(1, P, P),
  splitsize = round(.5 + data_conjoint$T/(4 * threads[[2]])),
  agg_model = NULL,
  tag = NULL
)

HB_model$sample(modifyList(data_conjoint, data_model),
                iter_warmup = 4,
                iter_sampling = 4,
                output_dir = dir_draws,
                chains = 2,
                parallel_chains = threads[[1]],
                threads_per_chain = threads[[2]],
                save_warmup = TRUE,
                refresh = 10,
                seed = 271,
                init = .1,
                show_messages = FALSE,
                validate_csv = FALSE
)

end.time <- Sys.time()
time.taken <- end.time - start.time
time_taken=time.taken
print(time_taken)

── [1mAttaching packages[22m ─────────────────────────────────────── tidyverse 1.3.1 ──

[32m✔[39m [34mggplot2[39m 3.3.5     [32m✔[39m [34mpurrr  [39m 0.3.4
[32m✔[39m [34mtibble [39m 3.1.4     [32m✔[39m [34mdplyr  [39m 1.0.7
[32m✔[39m [34mtidyr  [39m 1.1.3     [32m✔[39m [34mstringr[39m 1.4.0
[32m✔[39m [34mreadr  [39m 2.0.1     [32m✔[39m [34mforcats[39m 0.5.1

── [1mConflicts[22m ────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



// Kevin Lattery June 2020
// Conjoint Model in Stan
// Wishart prior with Barlett Decomposition (needs revision)
// CovBlock defines blocks of parameters
// Hinge function for constraints
// Parallel threads using reduce_sum

functions{
  matrix logistic_hinge(matrix x, matrix delta) {
    return delta .* log1p_exp(x ./ delta);
    // https://statmodeling.stat.columbia.edu/2017/05/19/continuous-hinge-function-bayesian-modeling/
  }
  
  real MNL_LL_par(int[] array_slice,
                  int a_beg, int a_end, // Stan determines values of these for specific core
                  matrix beta_ind,  matrix X,  vector dep,
                  int[] start, int[] end,
                  int[] task_individual
  ) {
    vector[end[a_end] - start[a_beg] +1] logprob; 
    int sub_adj = start[a_beg] -1; // logprob starts at 1, but first loop is start[a_beg]
    for (t in a_beg:a_end){
      logprob[(start[t]-sub_adj): (end[t]-sub_adj)]= 
        log_softmax(X[start[t]:end[t]] * 
                  

0
Att1_1
Att1_2
Att2_1
Att2_2
Att3_1
Att3_2
Att3_3
Att4_1
Att4_2
Att4_3


Running MCMC with 2 parallel chains, with 4 thread(s) per chain...

Chain 1          performed for num_warmup < 20 
Chain 2          performed for num_warmup < 20 
Chain 1 Iteration: 1 / 8 [ 12%]  (Warmup) 
Chain 2 Iteration: 1 / 8 [ 12%]  (Warmup) 
Chain 2 Iteration: 5 / 8 [ 62%]  (Sampling) 
Chain 1 Iteration: 5 / 8 [ 62%]  (Sampling) 
Chain 2 Iteration: 8 / 8 [100%]  (Sampling) 
Chain 1 Iteration: 8 / 8 [100%]  (Sampling) 
Chain 1 finished in 0.8 seconds.
Chain 2 finished in 0.7 seconds.

Both chains finished successfully.
Mean chain execution time: 0.8 seconds.
Total execution time: 1.5 seconds.


“NAs introduced by coercion”
“NAs introduced by coercion”


 variable      mean    median     sd    mad        q5       q95 rhat ess_bulk
 lp__     -16877.75 -16877.75 223.59 310.09 -17086.90 -16668.60   NA       NA
 alpha[1]     -0.45     -0.45   0.02   0.03     -0.46     -0.43  Inf       NA
 alpha[2]     -0.78     -0.78   0.01   0.02     -0.79     -0.77  Inf       NA
 alpha[3]     -0.16     -0.16   0.01   0.01     -0.16     -0.15   NA       NA
 alpha[4]      0.01      0.01   0.05   0.07     -0.04      0.05   NA       NA
 alpha[5]     -0.74     -0.74   0.03   0.04     -0.77     -0.72   NA       NA
 alpha[6]     -0.26     -0.26   0.04   0.06     -0.30     -0.22  Inf       NA
 alpha[7]     -0.25     -0.25   0.04   0.05     -0.28     -0.21   NA       NA
 alpha[8]     -0.06     -0.06   0.02   0.03     -0.09     -0.04  Inf       NA
 alpha[9]      0.35      0.35   0.01   0.01      0.35      0.36   NA       NA
 ess_tail
       NA
       NA
       NA
       NA
       NA
       NA
       NA
       NA
       NA
       NA

 # showing 10 of 56061 rows (ch

Time difference of 10.89821 secs


In [8]:
############################################################
##   Process Output
##########################################################
library("posterior")
csv_name <- c("MNL_BartBlockCon_v1_6-202107231212-1-498344.csv",
              "MNL_BartBlockCon_v1_6-202107231212-2-498344.csv"
) # You must specify names of output files in dir_draws

draws_upper <- read_cmdstan_csv(file.path(dir_draws, csv_name), variables = c("alpha"))
fit_stats <- summarize_draws(draws_upper$post_warmup_draws)

chain1_Alphas<- matrix(draws_upper$post_warmup_draws[,1,],
                       400,data_conjoint$P)

# Check normality
library('rstatix')
varChain<- as.data.frame(chain1_Alphas[,1])
shapTest<- shapiro_test(varChain[,1])

ggplot(varChain, aes(x=varChain[,1])) + 
  geom_histogram(aes(y=..density..), colour="black", fill="white")+
  geom_density(alpha=.5, fill="#FF6666") +
  geom_vline(xintercept =c(quantile(varChain[,1],.05), quantile(varChain[,1],.95)), color = "red") + 
  geom_text(aes(x = quantile(varChain[,1],.005), y = 1, 
                label = paste0("Shapiro p-value = ",round(shapTest$p.value,3))))


draws_beta <- read_cmdstan_csv(file.path(dir_draws, csv_name), variables = "beta_ind", sampler_diagnostics = "")
utilities <- matrix(colMeans(draws_beta$post_warmup_draws, dims = 2),
                    data_conjoint$I, data_conjoint$P, byrow = TRUE)
# Above assume draws were stored (P1,id1), (P2, id1), ..., (P1, id2)
# Otherwise byrow = FALSE
betas_final_r <- utilities %*% t(data_conjoint$code_master)
write.table(betas_final_r, file = file.path(dir_out, "betas_final_r.csv"), sep = ",", na = ".", row.names = FALSE)
write.table(fit_stats, file = file.path(dir_out, "fit_stats.csv"), sep = ",", na = ".", row.names = FALSE)


This is posterior version 1.1.0


Attaching package: ‘posterior’


The following objects are masked from ‘package:stats’:

    mad, sd, var




ERROR: Error in read_cmdstan_csv(file.path(dir_draws, csv_name), variables = c("alpha")): Assertion on 'files' failed: File does not exist: '/home/sagemaker-user/cmdstanrTest/2_HB_Run/2_Stan_HB_Run/1_Output/MNL_BartBlockCon_v1_6-202107231212-1-498344.csv'.
