julia EM model fitting example, Nathaniel Daw 8/2020

####### NOTE NOTE NOTE: PARALLEL COMPUTATION IS NOW AUTOMATIC IN THIS VERSION 
####### BUT TO RUN PARALLEL YOU MUST SET ENVIRONMENT VARIABLE JULIA_NUM_THREADS  
####### BEFORE STARTING JULIA OR JUPYTER-NOTEBOOK

eg in linux/bash:
      export JULIA_NUM_THREADS=`nproc`; julia

or just run julia with --threads=auto

In [1]:
# to install the package (only need to do this once):
# (note this fails with a weird error if you run this notebook from this directory)
import Pkg
Pkg.add(url="https://github.com/ndawlab/em.git/")

[32m[1m    Updating[22m[39m git-repo `https://github.com/ndawlab/em.git`
[32m[1m   Resolving[22m[39m package versions...
[32m[1m  No Changes[22m[39m to `C:\Users\ndaw\.julia\environments\v1.9\Project.toml`
[32m[1m  No Changes[22m[39m to `C:\Users\ndaw\.julia\environments\v1.9\Manifest.toml`


In [1]:
###### setup 

full = false    # Maintain full covariance matrix (vs a diagional one) at the group level
emtol = 1e-3    # stopping condition (relative change) for EM

# load EM package
using EM

# this loads additional packages used in examples below
# install them with install

using Statistics
using Random
using GLM
using DataFrames


In [2]:
###### Q learning example

# simulate some  qlearning data

Random.seed!(1234); # (for repeatability)

NS = 250;
NT = 200;
NP = 2;

params = zeros(NS,NP);

cov = randn(NS); # simulated between-subject variable, e.g. age or IQ
cov = cov .- mean(cov);

cov2 = randn(NS); # simulated between-subject variable, e.g. age or IQ
cov2 = cov2 .- mean(cov2);

# subject level parameters

params[:,1] = 1 .+ 0.5 * randn(NS) + cov; # softmax  temp: mean 1, effect of cov
params[:,2] = 0 .+ 1 * randn(NS) + cov2;  # learning rate (pre sigmoidal transform): mean 0, effect of cov2

c = zeros(Int64,NS*NT);
r = zeros(Int64,NS*NT);
s = zeros(Int64,NS*NT);

for i = 1:NS
	(c[(i-1)*NT+1:i*NT],r[(i-1)*NT+1:i*NT]) = simq(params[i,:],NT);
	s[(i-1)*NT+1:i*NT] .= i;
end

data = DataFrame(sub=s,c=c,r=r);
subs = 1:NS;

In [3]:
# set up the fit

# design matrix specifying the group level model
# this is replicated once for each model parameter
#
# in particular for each subject-level parameter x_ij  (subject i, parameter j)
#
# x_ij ~ Normal(X beta_j, Sigma)
#
# thus X has a row for each subject and a column for each predictor
# in the simplest case where the only predictor is an intercept, X = ones(NS)
# then beta_j specifies the group-level mean for parameter j
#
# but in this example we have two covariates that vary by subject
# so x_ij = beta_1j + beta_2j * cov_i + beta_3j * cov2_i
# and we infer the slopes beta for each parameter j as well as the intercept
#
# so we have a design matrix with 3 columns, and a row per subject:

X = [ones(NS) cov cov2];

# note: when you have no covariates (only intercepts) omit the brackets to get a column vector

# X = ones(NS)

# starting points for group level parameters
# betas: one column for each parameter, one row for each regressor (so here: 3 rows, 2 columns)
# make sure these are floats
# note: if you have a single predictor you need a row vector (length: # params)
# eg betas = [0. 0.];
# and if there is also only a single model parameter and no covariates, then betas is a scalar
# eg betas = 0.

startbetas = [1. 0; 0 0; 0 0]

# sigma: one element starting variance for each model parameter (this is really variance not SD)
# if there is only one model parameter it needs to be a length-one vector eg. sigma = [5.]

startsigma = [5., 1];



Estimation & significance tests

In [4]:

# fit the model
# (this takes: a data frame, a list of subjects, a group level design matrix, 
#  starting group level betas, starting group-level variance or covariance, a likelihood function
#  and some optional options)
#
# (return values: betas are the group level means and slopes
#  sigma is the group level *variance* or covariance
#  x is a matrix of MAP/empirical Bayes per-subject parameters
#  l is the per-subject negative log likelihoods 
#  h is the *inverse* per subject hessians) 

(betas,sigma,x,l,h) = em(data,subs,X,startbetas,startsigma,qlik; emtol=emtol, full=full);

betas # ground truth would be [1 0 ; 1 0; 0 1] so this is closei


iter: 12
betas: [0.95 -0.08; 0.94 0.01; -0.02 0.85]
sigma: [0.29, 0.64]
free energy: -22912.710021
change: [4.0e-5, -0.000122, 6.3e-5, 0.00053, -0.000175, 2.0e-6, 0.000114, 9.0e-6]
max: 0.00053


3×2 Matrix{Float64}:
  0.947158  -0.0813925
  0.940341   0.0115734
 -0.016061   0.850036

In [5]:
# standard errors on the subject-level means, based on an asymptotic Gaussian approx 
# (these may be inflated for small n)
# returns standard errors, pvalues, and a covariance matrix 
# these are a vector ordered as though the betas matrix were read out column-wise
# eg parameter 1, (intercept covariate covariate) then parameter 2

(standarderrors,pvalues,covmtx) = emerrors(data,subs,x,X,h,betas,sigma,qlik)
reshape(pvalues,size(betas'))'  # cov1 (2nd row) is significant for beta (first column)
# & cov2 (3rd row) is significant for alpha (second column)

3×2 adjoint(::Matrix{Float64}) with eltype Float64:
 4.8045e-87   0.16772
 2.30126e-78  0.827098
 0.658954     3.8568e-36

In [6]:
# another way to get a p value for a covariate, by omitting it from the model and regressing
# this seems to work better when full=false
# in general not super well justified and can clearly be biased in some cases
# but works well in practice as long as you avoid the bias cases (which are pretty obvious)

X2 = ones(NS);
startbetas2 = [0. 0.];
startsigma2 = [5., 1];
(betas2,sigma2,x2,l2,h2) = em(data,subs,X2,startbetas2,startsigma2,qlik; emtol=1e-5, full=full);

display(lm(@formula(beta~cov+cov2),DataFrame(beta=x2[:,1],cov=cov,cov2=cov2)))
display(lm(@formula(alpha~cov+cov2),DataFrame(alpha=x2[:,2],cov=cov,cov2=cov2)))

# again the first covariate is significant for beta and the second for alpha


iter: 12
betas: [0.9 -0.04]
sigma: [1.17, 1.11]
free energy: -23783.131705
change: [0.0, -4.0e-6, 1.0e-6, 0.0]
max: 4.0e-6


StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

beta ~ 1 + cov + cov2

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.898618     0.0364278  24.67    <1e-67   0.82687    0.970367
cov           0.82229      0.034146   24.08    <1e-66   0.755036   0.889545
cov2         -0.00558035   0.0357861  -0.16    0.8762  -0.0760652  0.0649045
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, LinearAlgebra.CholeskyPivoted{Float64, Matrix{Float64}, Vector{Int64}}}}, Matrix{Float64}}

alpha ~ 1 + cov + cov2

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)  -0.0436486    0.0493965  -0.88    0.3778  -0.140941   0.0536435
cov          -0.00676074   0.0463024  -0.15    0.8840  -0.0979586  0.0844371
cov2          0.572827     0.0485264  11.80    <1e-25   0.477249   0.668405
────────────────────────────────────────────────────────────────────────────

Model comparison metrics

In [7]:
# laplace approximation to the aggregate log marginal likelihood of the whole dataset
# marginalized over the individual params

ll1 = lml(x,l,h)

22331.093777006565

In [8]:
# to compare these between models you need to correct for the group level free parameters
# either aic or bic (this is Quentin Huys' IBIC or IAIC, i.e. the subject level
# params are marginalized by laplace approx, and aggregated, and the group level
# params are corrected by AIC or BIC)

ibic(x,l,h,betas,sigma,NS*NT)


22374.372890144205

In [9]:
iaic(x,l,h,betas,sigma)

22339.093777006565

In [10]:
# or by computing unbiased per subject marginal likelihoods via cross validation.
# you can do paired t tests on these between models
# these are also appropriate for SPM_BMS etc

liks = loocv(data,subs,x,X,betas,sigma,qlik;emtol=emtol, full=full)
sum(liks)

# note that iaic does an excellent job of predicting the aggregate held out likelihood
# but importantly these are per subject scores that you can compare in paired tests
# across models as per Stephan et al. random effects model comparison


iter: 4
betas: [0.94 -0.07; 0.94 0.02; -0.02 0.85]
sigma: [0.29, 0.62]
free energy: -22894.045173
change: [9.9e-5, -0.000891, 0.000159, 7.4e-5, -0.000796, 7.0e-6, 0.000266, 0.000403]
max: 0.000891


22341.82677834569