In [1]:
# load some required packages
# ( you need to install these eg via import Pkg; Pkg.add(...) )

using CSV
using DataFrames
using LinearAlgebra
using SpecialFunctions:erf
using StatsFuns:logsumexp
using GLM
using Statistics
using StatsBase

In [2]:
# load the EM model fitting code 
# get this from https://github.com/ndawlab/em

directory = "/home/daw/winhome/Dropbox/expts/julia em/git/em"

push!(LOAD_PATH,directory)
using EM

In [3]:
# load the model code (likelihood function)

include("patent likfuns.jl")

#1 (generic function with 1 method)

In [4]:
# load the data 

alldata = CSV.File("patent_investments_743subs.csv") |> DataFrame
exp1subs = unique(alldata.sub)[1:412]
exp1NS = length(exp1subs)

exp2subs = unique(alldata.sub)[413:end]
exp2NS = length(exp2subs)

allsubs = unique(alldata.sub)
allNS = length(allsubs)

743

In [5]:
# load the covariates
# make sure the covariates are z scored separately within cohorts

mergedcovars = CSV.File("mergedcovars.csv") |> DataFrame 
exp1covars = mergedcovars[1:412,:]
exp1covars.iqZ = zscore(exp1covars.iqZ)
exp1covars.lsasZ = zscore(exp1covars.lsasZ)

exp2covars = mergedcovars[413:end,:]
exp2covars.iqZ = zscore(exp2covars.iqZ)
exp2covars.lsasZ = zscore(exp2covars.lsasZ);

In [14]:
# Expt 1, basic model
# set up design matrix for fit

X = ones(exp1NS)

# start points

betas = [-1. 0 0 0 0 0 0 0 ]
sigma = [10.,1,1,1,1,1,1,1]

# fit the model

(betas,sigma,x,l,h) = em(alldata,exp1subs,X,betas,sigma,ewalik; emtol=1e-3, full=false, maxiter=1000);


iter: 682
betas: [1.24 1.25 -0.94 0.01 -0.33 -0.1 0.08 0.13]
sigma: [0.25, 0.56, 0.82, 0.0, 0.02, 0.02, 0.01, 0.01]
free energy: -31959.845135
change: [2.0e-6, 1.0e-6, -4.0e-6, 1.3e-5, -6.0e-6, -5.0e-6, 1.9e-5, 1.2e-5, 1.1e-5, 2.0e-6, 0.0, 0.001, 0.000856, 2.6e-5, 0.000452, 0.000648]
max: 0.001


In [15]:
# x gives the per subject parameters (some need transforming to 0-1)
# extract them and regresss on the covariates (originally we saved these and did regressions in Matlab)

exp1covars.beta = x[:,1]
exp1covars.alpha = 1 .- ( 0.5 .+ 0.5 .* erf.(x[:,2] ./ sqrt(2))) # alpha = 1-phi, squashed
exp1covars.w = ( 0.5 .+ 0.5 .* erf.(x[:,3] ./ sqrt(2)))

display(lm(@formula(beta~lsasZ+iqZ),exp1covars))
display(lm(@formula(alpha~lsasZ+iqZ),exp1covars))
display(lm(@formula(w~lsasZ+iqZ),exp1covars))

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

beta ~ 1 + lsasZ + iqZ

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%    Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   1.24089     0.0219799  56.46    <1e-99   1.19768     1.2841
lsasZ        -0.0471625   0.0220107  -2.14    0.0327  -0.0904308  -0.00389423
iqZ           0.037468    0.0220107   1.70    0.0895  -0.0058003   0.0807363
─────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

alpha ~ 1 + lsasZ + iqZ

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)    Lower 95%   Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   0.150243   0.0077461   19.40    <1e-59   0.135016    0.16547
lsasZ        -0.0060945  0.00775697  -0.79    0.4325  -0.021343    0.00915401
iqZ           0.0125458  0.00775697   1.62    0.1066  -0.00270272  0.0277943
─────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

w ~ 1 + lsasZ + iqZ

Coefficients:
──────────────────────────────────────────────────────────────────────────
                 Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────
(Intercept)  0.219201   0.00966949  22.67    <1e-73  0.200193    0.23821
lsasZ        0.0253681  0.00968306   2.62    0.0091  0.00633336  0.0444029
iqZ          0.0232599  0.00968306   2.40    0.0167  0.00422511  0.0422946
──────────────────────────────────────────────────────────────────────────

In [8]:
# Repeat the process with the valenced model, expt 1

# set up the design matrix

X = ones(exp1NS)

# start points 

betas = [-1. 0 0 0 0 0 0 0 0]
sigma = [10.,1,1,1,1,1,1,1,1]

# fit the model

(betas,sigma,x,l,h) = em(alldata,exp1subs,X,betas,sigma,ewalik_valenced; emtol=1e-3, full=false, maxiter=1000);


iter: 680
betas: [1.29 1.34 -0.64 -1.47 0.02 -0.34 -0.1 0.08 0.13]
sigma: [0.22, 0.59, 0.85, 1.25, 0.0, 0.02, 0.02, 0.01, 0.0]
free energy: -31662.566504
change: [2.0e-6, 1.0e-6, -4.0e-6, -5.0e-6, 3.8e-5, -4.0e-6, -1.0e-5, 2.2e-5, 1.4e-5, 8.0e-6, 2.2e-5, 3.0e-6, 3.0e-6, 0.000999, 0.000636, 0.000204, 0.00049, 0.000642]
max: 0.000999


In [9]:
# x gives the per subject parameters (some need transforming to 0-1)
# extract them and regresss on the covariates (originally we saved these and did regressions in Matlab)

exp1covars.betavalenced = x[:,1]
exp1covars.alphavalenced = 1 .- ( 0.5 .+ 0.5 .* erf.(x[:,2] ./ sqrt(2))) # alpha = 1-phi, squashed
exp1covars.wplus = ( 0.5 .+ 0.5 .* erf.(x[:,3] ./ sqrt(2)))
exp1covars.wminus = ( 0.5 .+ 0.5 .* erf.(x[:,4] ./ sqrt(2)))

display(lm(@formula(betavalenced~lsasZ+iqZ),exp1covars))
display(lm(@formula(alphavalenced~lsasZ+iqZ),exp1covars))
display(lm(@formula(wplus~lsasZ+iqZ),exp1covars)) # (the lsas effect comes up not quite signif in this version, python optimizer gives answer on other side of .05)
display(lm(@formula(wminus~lsasZ+iqZ),exp1covars))

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

betavalenced ~ 1 + lsasZ + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   1.29049     0.0205433  62.82    <1e-99   1.25011    1.33087
lsasZ        -0.036237    0.0205722  -1.76    0.0789  -0.0766774  0.00420333
iqZ           0.0285918   0.0205722   1.39    0.1653  -0.0118486  0.0690321
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

alphavalenced ~ 1 + lsasZ + iqZ

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   0.135543    0.00745662  18.18    <1e-53   0.120885    0.150201
lsasZ        -0.00298325  0.00746708  -0.40    0.6897  -0.0176619   0.0116954
iqZ           0.0123427   0.00746708   1.65    0.0991  -0.00233597  0.0270213
─────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wplus ~ 1 + lsasZ + iqZ

Coefficients:
───────────────────────────────────────────────────────────────────────────
                 Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)  0.306345    0.0111714  27.42    <1e-93   0.284385    0.328305
lsasZ        0.0194031   0.011187    1.73    0.0836  -0.00258815  0.0413944
iqZ          0.0422159   0.011187    3.77    0.0002   0.0202246   0.0642072
───────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wminus ~ 1 + lsasZ + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.113346    0.00845688  13.40    <1e-33   0.0967212  0.12997
lsasZ         0.00553211  0.00846874   0.65    0.5140  -0.0111156  0.0221798
iqZ          -0.00127264  0.00846874  -0.15    0.8806  -0.0179203  0.0153751
────────────────────────────────────────────────────────────────────────────

In [10]:
# Expt 2, basic model
# set up design matrix for fit

X = ones(exp2NS)

# initial conditions

betas = [-1. 0 0 0 0 0 0 0 ]
sigma = [10.,1,1,1,1,1,1,1]

# fit the model

(betas,sigma,x,l,h) = em(alldata,exp2subs,X,betas,sigma,ewalik; emtol=1e-3, full=false, maxiter=1000);


iter: 920
betas: [1.15 1.31 -1.08 -0.05 -0.26 -0.17 0.09 0.15]
sigma: [0.43, 0.63, 1.29, 0.01, 0.01, 0.01, 0.02, 0.01]
free energy: -27656.097438
change: [1.0e-6, 0.0, -1.0e-6, -0.000113, -2.3e-5, -4.1e-5, 6.5e-5, 3.8e-5, 3.0e-6, 1.0e-6, 0.0, 6.6e-5, 0.000999, 0.000915, 7.7e-5, 0.000407]
max: 0.000999


In [11]:
# x gives the per subject parameters (some need transforming to 0-1)
# extract them and regresss on the covariates (originally we saved these and did regressions in Matlab)

exp2covars.beta = x[:,1]
exp2covars.alpha = 1 .- ( 0.5 .+ 0.5 .* erf.(x[:,2] ./ sqrt(2))) # alpha = 1-phi, squashed
exp2covars.w = ( 0.5 .+ 0.5 .* erf.(x[:,3] ./ sqrt(2)))

display(lm(@formula(beta~lsasZ+iqZ),exp2covars))
display(lm(@formula(alpha~lsasZ+iqZ),exp2covars))
display(lm(@formula(w~lsasZ+iqZ),exp2covars))

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

beta ~ 1 + lsasZ + iqZ

Coefficients:
──────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)  Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────
(Intercept)  1.14559      0.0334837  34.21    <1e-99   1.07972   1.21146
lsasZ        0.00468974   0.0335456   0.14    0.8889  -0.061302  0.0706814
iqZ          0.11526      0.0335456   3.44    0.0007   0.049268  0.181251
──────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

alpha ~ 1 + lsasZ + iqZ

Coefficients:
───────────────────────────────────────────────────────────────────────────
                 Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)  0.145498   0.00855549  17.01    <1e-46   0.128667    0.162328
lsasZ        0.0114415  0.00857131   1.33    0.1828  -0.00542017  0.0283032
iqZ          0.0101638  0.00857131   1.19    0.2366  -0.00669783  0.0270255
───────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

w ~ 1 + lsasZ + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)  0.203427     0.0131672  15.45    <1e-40   0.177524    0.22933
lsasZ        0.0296758    0.0131916   2.25    0.0251   0.00372503  0.0556266
iqZ          0.00698428   0.0131916   0.53    0.5969  -0.0189665   0.0329351
────────────────────────────────────────────────────────────────────────────

In [12]:
# Repeat the process with the valenced model, expt 2

# set up the design matrix

X = ones(exp2NS)

betas = [-1. 0 0 0 0 0 0 0 0]
sigma = [10.,1,1,1,1,1,1,1,1]

(betas,sigma,x,l,h) = em(alldata,exp2subs,X,betas,sigma,ewalik_valenced; emtol=1e-3, full=false, maxiter=1000);


iter: 592
betas: [1.23 1.38 -0.91 -1.23 -0.04 -0.27 -0.17 0.12 0.16]
sigma: [0.31, 0.63, 1.07, 1.23, 0.01, 0.02, 0.01, 0.01, 0.01]
free energy: -27300.340964
change: [3.0e-6, 1.0e-6, -4.0e-6, -6.0e-6, -4.0e-5, -1.3e-5, -2.0e-5, 1.1e-5, 4.0e-6, 1.2e-5, 3.0e-6, 1.0e-6, 2.0e-6, 0.000654, 0.000999, 0.00095, 0.00051, 0.000651]
max: 0.000999


In [13]:
# x gives the per subject parameters (some need transforming to 0-1)
# extract them and regresss on the covariates (originally we saved these and did regressions in Matlab)

exp2covars.betavalenced = x[:,1]
exp2covars.alphavalenced = 1 .- ( 0.5 .+ 0.5 .* erf.(x[:,2] ./ sqrt(2))) # alpha = 1-phi, squashed
exp2covars.wplus = ( 0.5 .+ 0.5 .* erf.(x[:,3] ./ sqrt(2)))
exp2covars.wminus = ( 0.5 .+ 0.5 .* erf.(x[:,4] ./ sqrt(2)))

display(lm(@formula(betavalenced~lsasZ+iqZ),exp2covars))
display(lm(@formula(alphavalenced~lsasZ+iqZ),exp2covars))
display(lm(@formula(wplus~lsasZ+iqZ),exp2covars))
display(lm(@formula(wminus~lsasZ+iqZ),exp2covars))

# also regress on additional psychiatric symptom factors.
# f3 is the one measuring social anxiety, f1 & f2 control for other symptom dimensions

display(lm(@formula(betavalenced~f1+f2+f3+iqZ),exp2covars))
display(lm(@formula(alphavalenced~f1+f2+f3+iqZ),exp2covars))
display(lm(@formula(wplus~f1+f2+f3+iqZ),exp2covars))
display(lm(@formula(wminus~f1+f2+f3+iqZ),exp2covars))

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

betavalenced ~ 1 + lsasZ + iqZ

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                    Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   1.233         0.0276095  44.66    <1e-99   1.17868    1.28731
lsasZ        -0.000650693   0.0276605  -0.02    0.9812  -0.0550651  0.0537637
iqZ           0.100229      0.0276605   3.62    0.0003   0.0458147  0.154643
─────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

alphavalenced ~ 1 + lsasZ + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)  0.133254    0.00826289  16.13    <1e-42   0.116999    0.149509
lsasZ        0.00873378  0.00827817   1.06    0.2922  -0.00755122  0.0250188
iqZ          0.0104969   0.00827817   1.27    0.2057  -0.0057881   0.0267819
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wplus ~ 1 + lsasZ + iqZ

Coefficients:
───────────────────────────────────────────────────────────────────────────
                 Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
───────────────────────────────────────────────────────────────────────────
(Intercept)  0.245541    0.0130666  18.79    <1e-53   0.219836    0.271246
lsasZ        0.0390789   0.0130908   2.99    0.0030   0.0133264   0.0648313
iqZ          0.0162201   0.0130908   1.24    0.2162  -0.00953233  0.0419725
───────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wminus ~ 1 + lsasZ + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)   Lower 95%   Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   0.159701    0.0111718  14.29    <1e-35   0.137724   0.181679
lsasZ         0.0120764   0.0111925   1.08    0.2814  -0.0099418  0.0340945
iqZ          -0.0207162   0.0111925  -1.85    0.0651  -0.0427344  0.00130194
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

betavalenced ~ 1 + f1 + f2 + f3 + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)   1.233        0.0276052  44.67    <1e-99   1.17869    1.2873
f1           -0.0380977    0.0306472  -1.24    0.2147  -0.0983888  0.0221935
f2            0.0272523    0.028048    0.97    0.3320  -0.0279256  0.0824302
f3            0.00243292   0.0308435   0.08    0.9372  -0.0582446  0.0631104
iqZ           0.100184     0.0276564   3.62    0.0003   0.045777   0.154592
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

alphavalenced ~ 1 + f1 + f2 + f3 + iqZ

Coefficients:
────────────────────────────────────────────────────────────────────────────
                  Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
────────────────────────────────────────────────────────────────────────────
(Intercept)  0.133254    0.00826647  16.12    <1e-42   0.116992    0.149517
f1           0.00935006  0.00917738   1.02    0.3090  -0.00870431  0.0274044
f2           0.00250044  0.00839905   0.30    0.7661  -0.0140227   0.0190236
f3           0.00313237  0.00923619   0.34    0.7347  -0.0150377   0.0213024
iqZ          0.010781    0.00828178   1.30    0.1939  -0.0055115   0.0270734
────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wplus ~ 1 + f1 + f2 + f3 + iqZ

Coefficients:
──────────────────────────────────────────────────────────────────────────────
                    Coef.  Std. Error      t  Pr(>|t|)    Lower 95%  Upper 95%
──────────────────────────────────────────────────────────────────────────────
(Intercept)   0.245541      0.0130901  18.76    <1e-53   0.219789    0.271292
f1            0.000974391   0.0145326   0.07    0.9466  -0.027615    0.0295638
f2           -0.00407424    0.0133001  -0.31    0.7595  -0.030239    0.0220905
f3            0.0388603     0.0146257   2.66    0.0083   0.0100876   0.0676329
iqZ           0.0165709     0.0131144   1.26    0.2073  -0.00922854  0.0423704
──────────────────────────────────────────────────────────────────────────────

StatsModels.TableRegressionModel{LinearModel{GLM.LmResp{Vector{Float64}}, GLM.DensePredChol{Float64, CholeskyPivoted{Float64, Matrix{Float64}}}}, Matrix{Float64}}

wminus ~ 1 + f1 + f2 + f3 + iqZ

Coefficients:
─────────────────────────────────────────────────────────────────────────────
                   Coef.  Std. Error      t  Pr(>|t|)   Lower 95%   Upper 95%
─────────────────────────────────────────────────────────────────────────────
(Intercept)   0.159701     0.0111838  14.28    <1e-35   0.1377     0.181703
f1           -0.00116892   0.0124162  -0.09    0.9251  -0.025595   0.0232571
f2            0.011598     0.0113632   1.02    0.3082  -0.0107564  0.0339525
f3            0.0095576    0.0124958   0.76    0.4449  -0.015025   0.0341402
iqZ          -0.020426     0.0112046  -1.82    0.0692  -0.0424684  0.00161631
─────────────────────────────────────────────────────────────────────────────