In [2]:
import Pkg
Pkg.activate(".")

dependencies = [
    "IJulia",
    "Revise",
    "Turing",
    "Bijectors",
    "ParetoSmooth",  
    "LogExpFunctions",    
    "StatsPlots",             
    "DataFrames",       
    "JLD2",            
    "CSV"
]

# Pkg.add(dependencies)
Pkg.status()

using Revise, Turing, Bijectors, ParetoSmooth, LogExpFunctions, StatsPlots, DataFrames, JLD2, CSV
using Random, LinearAlgebra

jlfiles = [
    "code/DataPreparation.jl",
    "code/Utils.jl",
    "code/StatePlot.jl",
    "code/MyModels.jl",
    "code/models/glm.jl",
    "code/models/glmhmm.jl",
    "code/models/iohmm.jl",
]
for file in jlfiles
    include(file)
end

# Check number of threads
println("Running on ", Threads.nthreads(), " threads.")
# Set random seed
const SEED = 2026;

[32m[1m  Activating[22m[39m project at `c:\Users\michi\WorkSpace\gitwork\mich2000jp\IPD_IOHMM`


[32m[1mStatus[22m[39m `C:\Users\michi\WorkSpace\gitwork\mich2000jp\IPD_IOHMM\Project.toml`
  [90m[76274a88] [39mBijectors v0.15.14
  [90m[336ed68f] [39mCSV v0.10.15
  [90m[a93c6f00] [39mDataFrames v1.8.1
  [90m[7073ff75] [39mIJulia v1.33.0
  [90m[033835bb] [39mJLD2 v0.6.3
  [90m[2ab3a3ac] [39mLogExpFunctions v0.3.29
  [90m[a68b5a21] [39mParetoSmooth v0.7.16
  [90m[295af30f] [39mRevise v3.13.0
  [90m[f3b207a7] [39mStatsPlots v0.15.8
[32m⌃[39m [90m[fce5fe82] [39mTuring v0.40.5
[36m[1mInfo[22m[39m Packages marked with [32m⌃[39m have new versions available and may be upgradable.
Running on 12 threads.


In [3]:
## Data Preparation ----------------------------------------------------
CSV_FIX_PATH = "data/fix.csv"
CSV_RAND_PATH = "data/rand.csv"
DATA_FIX_PATH = "data/fix.jld2"
DATA_RAND_PATH = "data/rand.jld2"

ID_EXCLUDED_FP = []
ID_EXCLUDED_SP = []

prepare_data(CSV_FIX_PATH, DATA_FIX_PATH)
prepare_data(CSV_RAND_PATH, DATA_RAND_PATH)


data_fp = load_data(DATA_FIX_PATH, ID_excluded=ID_EXCLUDED_FP);
data_sp = load_data(DATA_RAND_PATH, ID_excluded=ID_EXCLUDED_SP);

Reading CSV files...
Saving processed data to data/fix.jld2 ...
Data saved successfully.
Reading CSV files...
Saving processed data to data/rand.jld2 ...
Data saved successfully.
Data loaded successfully. Excluded IDs: Any[]
Data loaded successfully. Excluded IDs: Any[]


In [None]:
## MCMC Settings ----------------------------------------------------
n_iter   = 2000    # Number of samples
n_burnin = 2000    # Burn-in
n_chains = 6       # Number of chains

trt        = "fp"
model_name = "glmhmm"
K_states   = 2
prefix     = "Norm15"

sampler = NUTS(0.8; adtype=AutoForwardDiff())

println("=== Analysis Settings ===")
println("MCMC: $n_iter samples, $n_burnin burn-in, $n_chains chains")
println("Model: $model_name, K=$K_states, Condition=$trt, Prefix=$prefix")

=== Analysis Settings ===
MCMC: 2000 samples, 2000 burn-in, 6 chains
Model: glmhmm, K=2, Condition=fp, Prefix=Norm15


In [63]:
## MCMC Run ----------------------------------------------------
title = "$(trt)_$(model_name)_K$(K_states)_$prefix"
CHAIN_PATH    = "chain/$title.jld2"
SUMMARY_PATH = "output/$(title)_summary.csv"
LOO_PATH     = "output/$(title)_loo.csv"
PLOT_PATH     = "output/$(title)_plot.png"
PLOT_GQ_PATH  = "output/$(title)_plot_gq.png"
PARATO_PATH   = "output/$(title)_parato.png"

data = trt == "fp" ? data_fp : data_sp
model, model_gq = model_selector(model_name, data, K_states)
# chain = RunMCMC(SEED, model, K_states, sampler, n_iter, n_chains, n_burnin, CHAIN_PATH)
@load CHAIN_PATH chain

1-element Vector{Symbol}:
 :chain

In [51]:
OUTPUT_PATH = [SUMMARY_PATH, LOO_PATH, PLOT_PATH, PLOT_GQ_PATH, PARATO_PATH]
RunPostAnalysis(model_gq, chain, K_states, OUTPUT_PATH)

relabeling states...
generating quantities...
summarizing results...


Row,parameters,mean,std,mcse,ess_bulk,ess_tail,rhat,ess_per_sec,lower,upper
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Float64,Float64,Missing,Float64?,Float64?
1,beta0[1],-1.11191,0.0887651,0.0100578,88.6617,523.129,1.48211,missing,-1.25978,-0.936221
2,beta0[2],-4.28045,0.512791,0.0647296,80.0866,508.961,1.58647,missing,-5.07937,-3.37143
3,beta1[1],1.04473,0.333957,0.0466909,79.6309,506.988,1.59187,missing,0.660967,1.60952
4,beta1[2],5.52453,2.63752,0.375635,79.6776,508.242,1.59239,missing,1.21814,8.18689
5,beta2[1],0.214581,0.0896829,0.000648906,19103.2,18497.0,1.00075,missing,0.0306083,0.384396
6,beta2[2],0.203137,0.417221,0.0031239,20016.2,3131.71,1.043,missing,-0.671972,1.011
7,beta3[1],0.94266,0.165532,0.0124139,180.639,760.475,1.17183,missing,0.616915,1.25865
8,beta3[2],1.30284,0.712833,0.0307233,538.866,3296.68,1.0525,missing,-0.0464795,2.76815
9,"trans[1, 1]",0.99294,0.00415693,0.000539336,79.8066,522.404,1.59113,missing,0.986903,0.999577
10,"trans[2, 1]",0.00705979,0.00415693,0.000539336,79.8066,522.404,1.59113,missing,0.000423271,0.0130968


Plotting MCMC Results...
PSIS-LOO Calculation...


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mNo source provided for samples; variables are assumed to be from a Markov Chain. If the samples are independent, specify this with keyword argument `source=:other`.


There are 0 subjects with pareto k > 0.7, and 3 subjects with 0.5 < pareto k ≤ 0.7.
Subject 14: pareto k = 0.5029229499858878
Subject 57: pareto k = 0.5259017967512254
Subject 77: pareto k = 0.5146205502068643
All done!


In [None]:
## Visualization of State Dynamics ----------------------------------------------------
chain_relabeled = relabel_chain(chain, K_states)
bigtrt = trt == "fp" ? "FP" : "SP"

if model_name == "glmhmm"
    bigmodelname = "GLM-HMM"
elseif model_name == "iohmm_mealy"
    bigmodelname = "IO-HMM (Mealy)"
elseif model_name == "iohmm_moore"
    bigmodelname = "IO-HMM (Moore)"
end

all_states = decode(model_name, data, chain_relabeled, K_states)

p = plot_transition(all_states,
    title = "Transition Dynamics & State Composition ($bigtrt, $bigmodelname, $K_states states)",)
savefig(p, "output/$(title)_stateplot.png")

Decoding IO-HMM for 92 subjects using 8000 posterior samples...
Total MCMC samples available: 8000
Progress: 8000/8000
Decoding completed!


"c:\\Users\\michi\\WorkSpace\\gitwork\\mich2000jp\\IPD_IOHMM\\output\\fp_glmhmm_K2_Norm15_B_stateplot.png"

In [64]:
@load CHAIN_PATH chain
chain = relabel_chain(chain, K_states)
n_chn = size(chain, 3)
for i in 1:n_chn
    beta11 = chain[:, Symbol("beta1[1]"), i]
    mean_1 = mean(beta11)
    println("Chain $i: posterior mean of beta1[1] = $mean_1")
end

Chain 1: posterior mean of beta1[1] = 2.562860455541359
Chain 2: posterior mean of beta1[1] = 1.2186995306059603
Chain 3: posterior mean of beta1[1] = 1.2174745358796621
Chain 4: posterior mean of beta1[1] = 1.2263221539347853
Chain 5: posterior mean of beta1[1] = 1.2171735963274037
Chain 6: posterior mean of beta1[1] = 2.560687616822229
Chain 7: posterior mean of beta1[1] = 1.2245615611566172
Chain 8: posterior mean of beta1[1] = 1.2188960383988563
Chain 9: posterior mean of beta1[1] = 2.559985685811137
Chain 10: posterior mean of beta1[1] = 1.2232342476914682
Chain 11: posterior mean of beta1[1] = 2.561934754475542
Chain 12: posterior mean of beta1[1] = 1.2217606718002187


In [65]:
# FP GLMHMM
chn_A = [1,6,9,11]
chn_B = [2,3,4,5]

# # SP GLMHMM
# chn_A = [1,5,6,7]
# chn_B = [2,3,4,9]

chain_A = chain[:, :, chn_A]
CHAIN_PATH_A = "chain/$(title)_A.jld2"
OUTPUT_PATH_A = [
    "output/$(title)_A_summary.csv",
    "output/$(title)_A_loo.csv",
    "output/$(title)_A_plot.png",
    "output/$(title)_A_plot_gq.png",
    "output/$(title)_A_parato.png"
]
RunPostAnalysis(model_gq, chain_A, K_states, OUTPUT_PATH_A)
@save CHAIN_PATH_A chain_A

chain_B = chain[:, :, chn_B]
CHAIN_PATH_B = "chain/$(title)_B.jld2"
OUTPUT_PATH_B = [
    "output/$(title)_B_summary.csv",
    "output/$(title)_B_loo.csv",
    "output/$(title)_B_plot.png",
    "output/$(title)_B_plot_gq.png",
    "output/$(title)_B_parato.png"
]
RunPostAnalysis(model_gq, chain_B, K_states, OUTPUT_PATH_B)
@save CHAIN_PATH_B chain_B

relabeling states...
generating quantities...
summarizing results...


Row,parameters,mean,std,mcse,ess_bulk,ess_tail,rhat,ess_per_sec,lower,upper
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Float64,Float64,Missing,Float64?,Float64?
1,beta0[1],-3.06495,0.116774,0.00163913,5105.37,5453.04,1.00004,missing,-3.29296,-2.83964
2,beta0[2],-0.681435,0.109244,0.00164198,4423.65,5185.25,1.00059,missing,-0.90886,-0.479288
3,beta1[1],2.56137,0.18514,0.00263852,4940.64,5290.7,0.999894,missing,2.20163,2.91488
4,beta1[2],1.49605,0.137748,0.00204561,4522.88,5019.35,1.00026,missing,1.21732,1.75888
5,beta2[1],2.54243,0.174817,0.0024291,5181.11,5452.58,0.999762,missing,2.20153,2.88938
6,beta2[2],-0.158769,0.145608,0.00220854,4342.66,5554.15,1.00071,missing,-0.446102,0.125066
7,beta3[1],3.87997,0.498073,0.00587204,7428.61,5516.7,1.00024,missing,2.96817,4.8715
8,beta3[2],0.54162,0.184863,0.00270902,4658.07,5155.01,1.0003,missing,0.18312,0.897572
9,"trans[1, 1]",0.994448,0.00117124,1.35085e-05,7328.19,5357.58,1.00026,missing,0.992116,0.996656
10,"trans[2, 1]",0.00555203,0.00117124,1.35085e-05,7328.19,5357.58,1.00026,missing,0.00334399,0.00788415


Plotting MCMC Results...
PSIS-LOO Calculation...
There are 1 subjects with pareto k > 0.7, and 1 subjects with 0.5 < pareto k ≤ 0.7.
Subject 16: pareto k = 0.9891117424431128
Subject 83: pareto k = 0.6922904057004422
All done!
relabeling states...
generating quantities...
summarizing results...

[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mNo source provided for samples; variables are assumed to be from a Markov Chain. If the samples are independent, specify this with keyword argument `source=:other`.





Row,parameters,mean,std,mcse,ess_bulk,ess_tail,rhat,ess_per_sec,lower,upper
Unnamed: 0_level_1,Symbol,Float64,Float64,Float64,Float64,Float64,Float64,Missing,Float64?,Float64?
1,beta0[1],-1.52211,0.11407,0.00259717,2083.13,2376.11,1.00085,missing,-1.74277,-1.29839
2,beta0[2],-4.22825,0.330965,0.00607625,2901.79,3859.99,1.00115,missing,-4.88183,-3.61791
3,beta1[1],1.21992,0.146353,0.00309959,2315.34,2851.18,1.00094,missing,0.931378,1.50552
4,beta1[2],7.086,0.843889,0.0185696,2080.17,2249.4,1.00157,missing,5.37502,8.67967
5,beta2[1],1.06991,0.131545,0.00276315,2380.7,3059.18,1.0018,missing,0.81122,1.3217
6,beta2[2],1.26381,0.52303,0.00734082,5131.41,5549.96,1.00023,missing,0.223248,2.25554
7,beta3[1],0.92031,0.160763,0.00240428,4478.74,5583.08,1.00056,missing,0.59952,1.22839
8,beta3[2],3.49739,0.909994,0.0120232,5761.33,5560.84,1.00067,missing,1.72918,5.3178
9,"trans[1, 1]",0.997266,0.0012557,1.37635e-05,7337.76,4624.35,0.999785,missing,0.99486,0.999491
10,"trans[2, 1]",0.00273355,0.0012557,1.37635e-05,7337.76,4624.35,0.999784,missing,0.000508539,0.00513982


Plotting MCMC Results...
PSIS-LOO Calculation...
There are 4 subjects with pareto k > 0.7, and 6 subjects with 0.5 < pareto k ≤ 0.7.
Subject 11: pareto k = 2.283730033590291
Subject 15: pareto k = 0.5272623150012726
Subject 16: pareto k = 2.560175543356576
Subject 20: pareto k = 0.6450631973114088
Subject 24: pareto k = 0.5090586019264037
Subject 43: pareto k = 0.5527936186208346
Subject 58: pareto k = 0.6568653948659092
Subject 68: pareto k = 1.1597019124514094
Subject 71: pareto k = 0.6215205197900161
Subject 83: pareto k = 1.5071208359033808
All done!


[36m[1m[ [22m[39m[36m[1mInfo: [22m[39mNo source provided for samples; variables are assumed to be from a Markov Chain. If the samples are independent, specify this with keyword argument `source=:other`.
