Importing data

In [2]:
# uncomment if using this code in RStudio
# setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

library(coin)
library(lme4)
library(lmerTest)
library(plyr)
library(xtable)
library(LMERConvenienceFunctions)
library(car)
# library(lmerTest) see also https://link.springer.com/article/10.3758/s13428-016-0809-y 

source("r_utils/mer-utils.R")
source("r_utils/regression-utils.R")

data_path = "C:/Users/azgonnikov/Google Drive/data/CoM_fixed_duration/processed/choices.txt"
output_table_path = "C:/Users/azgonnikov/Dropbox/Research/Journal papers/2019 Changes-of-mind/v 0.2/tables/"

choice.data = read.table(data_path, sep="\t", header=T)

Loading required package: survival
Loading required package: Matrix

Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

Loading required package: carData
Registered S3 methods overwritten by 'car':
  method                          from
  influence.merMod                lme4
  cooks.distance.influence.merMod lme4
  dfbeta.influence.merMod         lme4
  dfbetas.influence.merMod        lme4


A bit of preprocessing to create variables for analyzing sequential effects

In [3]:
preprocess_data <- function(choice.data) {
    # any exclusions?
    stats.df = choice.data
    stats.df$subj_id = as.factor(stats.df$subj_id)

    # generate trial number
    max.trial = max(stats.df$trial_no)
    max.block = max(stats.df$block_n)
    stats.df$all_trial_no = (stats.df$session_no-1) * (max.block * max.trial) +
      (stats.df$block_no-1) * max.trial +
      stats.df$trial_no

    contrasts(stats.df$is_com) = contr.sum(n=2) /2 *-1
    contrasts(stats.df$is_com)# true is pos
    
#     stats.df = stats.df[(stats.df$RT < 1.5),]

    str(stats.df)

    return(stats.df)
}

stats.df = preprocess_data(choice.data)

'data.frame':	31517 obs. of  29 variables:
 $ subj_id        : Factor w/ 13 levels "196","216","247",..: 1 1 1 1 1 1 1 1 1 1 ...
 $ session_no     : int  1 1 1 1 1 1 1 1 1 1 ...
 $ block_no       : int  1 1 1 1 1 1 1 1 1 1 ...
 $ trial_no       : int  1 2 3 4 5 6 7 8 9 10 ...
 $ is_practice    : Factor w/ 2 levels "False","True": 2 2 2 2 2 2 2 2 2 2 ...
 $ direction      : num  0 180 180 0 180 0 180 180 180 180 ...
 $ coherence      : num  0.032 0.256 0 0.032 0.512 0.128 0 0.512 0.064 0 ...
 $ duration       : int  800 800 800 800 800 800 800 800 800 800 ...
 $ response       : int  0 0 180 180 180 0 0 180 0 180 ...
 $ trial_time     : num  1.31 1.99 2.24 2.33 3.32 ...
 $ is_correct     : Factor w/ 2 levels "False","True": 2 1 2 1 2 2 1 2 1 2 ...
 $ xflips         : int  1 0 0 3 0 1 2 2 0 1 ...
 $ max_d          : num  110.3 219 -77 335.4 62.1 ...
 $ idx_max_d      : num  92 121 100 108 72 165 165 159 53 48 ...
 $ midline_d      : num  0 0 0 190 0 348 0 0 0 0 ...
 $ idx_midline_d  : nu

# Analysis 1. Accuracy as a function of coherence

In [8]:
rnd_effects_analysis_1 <- function(stats.df){
    rnd1.lmer = glmer(is_correct ~ (1|subj_id), 
                 stats.df[stats.df$coherence!=0,], 
                 family = binomial)

    # rnd intercept for each participant and random slope of coherence
    # diff avg acc, diff coherence effect for each p
    rnd2.lmer = glmer(is_correct ~ (c.(coherence)|subj_id), 
                      stats.df[stats.df$coherence!=0,], 
                      family = binomial)

    # rnd intercept for each participant and random slope of trials
    # diff avg acc, diff linear and quad learning effect for each p
    rnd3.lmer = glmer(is_correct ~ (poly(coherence, 2, raw = T)|subj_id), 
                           stats.df[stats.df$coherence!=0,], 
                           family = binomial)

    # rnd.lmer with com and coherence
    rnd4.lmer = glmer(is_correct ~ ((is_com + c.(coherence))|subj_id),
                      stats.df[stats.df$coherence!=0,],
                      family = binomial)

    # rnd.lmer with com by coherence
    rnd5.lmer = glmer(is_correct ~ ((is_com*c.(coherence))|subj_id),
                      stats.df[stats.df$coherence!=0,],
                      family = binomial)
    
    rnd.anova = anova(rnd1.lmer, rnd2.lmer, rnd3.lmer, rnd4.lmer, rnd5.lmer)
    print(rnd.anova)
    
    print("Best model according to AIC")
    print(row.names(rnd.anova[rnd.anova$AIC==min(rnd.anova$AIC), ]))
    print("Best model according to BIC")
    print(row.names(rnd.anova[rnd.anova$BIC==min(rnd.anova$BIC), ]))
}

In [9]:
rnd_effects_analysis_1(stats.df)

boundary (singular) fit: see ?isSingular


Data: stats.df[stats.df$coherence != 0, ]
Models:
rnd1.lmer: is_correct ~ (1 | subj_id)
rnd2.lmer: is_correct ~ (c.(coherence) | subj_id)
rnd3.lmer: is_correct ~ (poly(coherence, 2, raw = T) | subj_id)
rnd4.lmer: is_correct ~ ((is_com + c.(coherence)) | subj_id)
rnd5.lmer: is_correct ~ ((is_com * c.(coherence)) | subj_id)
          Df   AIC   BIC logLik deviance    Chisq Chi Df Pr(>Chisq)    
rnd1.lmer  2 28077 28094 -14037    28073                               
rnd2.lmer  4 23747 23779 -11869    23739 4334.713      2  < 2.2e-16 ***
rnd3.lmer  7 23719 23776 -11852    23705   34.105      3  1.882e-07 ***
rnd4.lmer  7 23703 23760 -11844    23689   15.595      0  < 2.2e-16 ***
rnd5.lmer 11 23639 23729 -11808    23617   72.009      4  8.545e-15 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "Best model according to AIC"
[1] "rnd5.lmer"
[1] "Best model according to BIC"
[1] "rnd5.lmer"


rnd5 is the best

In [12]:
run_analysis_1 <- function(stats.df){
    choice.mer = glmer(is_correct ~ ((is_com*c.(coherence))|subj_id) + is_com*c.(coherence),
                      stats.df[stats.df$coherence!=0,],
                      family = binomial)
    print(summary(choice.mer))

    choice.output = summary(choice.mer)$coefficients
    row.names(choice.output) <- c("Intercept", "Is CoM", "Coherence", "Is CoM by Coherence")

    file_name = paste(output_table_path, "is_correct_vs_coh.tex", sep="")
    print(xtable(choice.output, digits = c(4,4,4,4,4),# display = c("g","g","g","g","g"), 
                 label = "tab:is_correct_vs_coh",
                 caption = "Parameters of a linear mixed-effects model analysing choice accuracy 
                            as a function of coherence, presence or absence of a change-of-mind, and trial number. The model 
                            included random intercept and random slope for coherence."), 
          caption.placement = "top", table.placement="t", floating.environment = "table*",
          math.style.exponents = TRUE, type = "latex", booktabs = TRUE, file = file_name)
}

In [13]:
run_analysis_1(stats.df)

"Model failed to converge with max|grad| = 0.00343225 (tol = 0.001, component 1)"

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: is_correct ~ ((is_com * c.(coherence)) | subj_id) + is_com *  
    c.(coherence)
   Data: stats.df[stats.df$coherence != 0, ]

     AIC      BIC   logLik deviance df.resid 
 23613.8  23728.3 -11792.9  23585.8    26241 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-66.302   0.023   0.292   0.652   1.192 

Random effects:
 Groups  Name                  Variance Std.Dev. Corr          
 subj_id (Intercept)            0.5040  0.7099                 
         is_com1                0.5371  0.7329   0.58          
         c.(coherence)         17.2470  4.1530   0.97 0.51     
         is_com1:c.(coherence) 18.2112  4.2675   0.72 0.98 0.66
Number of obs: 26255, groups:  subj_id, 13

Fixed effects:
                      Estimate Std. Error z value Pr(>|z|)    
(Intercept)             1.4084     0.1527   9.220  < 2e-16 ***
is_com1                -0

# Analysis 2. Probability of CoM as a function of initiation time

In [7]:
rnd_effects_analysis_2 <- function(stats.df){
    rnd1.lmer = glmer(is_com ~ (1|subj_id), stats.df, family = binomial)

    rnd2.lmer = glmer(is_com ~ (c.(coherence)|subj_id), stats.df, family = binomial)

    rnd3.lmer = glmer(is_com ~ (poly(c.(coherence), 2, raw = T)|subj_id), stats.df, family = binomial)

    rnd4.lmer = glmer(is_com ~ (c.(RT)|subj_id), stats.df, family = binomial)
    
    rnd5.lmer = glmer(is_com ~ (c.(RT)*c.(coherence)|subj_id), stats.df, family = binomial)
    
    rnd.anova = anova(rnd1.lmer, rnd2.lmer, rnd3.lmer, rnd4.lmer, rnd5.lmer)
    print(rnd.anova)
    
    print("Best model according to AIC")
    print(row.names(rnd.anova[rnd.anova$AIC==min(rnd.anova$AIC), ]))
    print("Best model according to BIC")
    print(row.names(rnd.anova[rnd.anova$BIC==min(rnd.anova$BIC), ]))
}

In [8]:
rnd_effects_analysis_2(stats.df)

boundary (singular) fit: see ?isSingular


Data: stats.df
Models:
rnd1.lmer: is_com ~ (1 | subj_id)
rnd2.lmer: is_com ~ (c.(coherence) | subj_id)
rnd4.lmer: is_com ~ (c.(RT) | subj_id)
rnd3.lmer: is_com ~ (poly(c.(coherence), 2, raw = T) | subj_id)
rnd5.lmer: is_com ~ (c.(RT) * c.(coherence) | subj_id)
          Df   AIC   BIC  logLik deviance   Chisq Chi Df Pr(>Chisq)    
rnd1.lmer  2 12690 12707 -6343.2    12686                              
rnd2.lmer  4 12479 12512 -6235.4    12471 215.468      2  < 2.2e-16 ***
rnd4.lmer  4 12499 12533 -6245.6    12491   0.000      0          1    
rnd3.lmer  7 12470 12528 -6227.9    12456  35.354      3  1.026e-07 ***
rnd5.lmer 11 12343 12435 -6160.6    12321 134.701      4  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "Best model according to AIC"
[1] "rnd5.lmer"
[1] "Best model according to BIC"
[1] "rnd5.lmer"


rnd5 is the best converging model

In [4]:
run_analysis_2 <- function(stats.df, exp_name){
    choice.mer = glmer(is_com ~ c.(RT)*c.(coherence) + (c.(RT)*c.(coherence)|subj_id), stats.df, family = binomial)
    print(summary(choice.mer))

    choice.output = summary(choice.mer)$coefficients
    row.names(choice.output) <- c("Intercept", "Coherence", "RT", "Coherence by RT")

    file_name = paste(output_table_path, "is_com_vs_RT.tex", sep="")
    print(xtable(choice.output, digits = c(4,4,4,4,4),
                 label = "tab:is_com_vs_RT",
                 caption = "Parameters of a linear mixed-effects model analysing probability of a change-of-mind 
                            as a function of coherence and response time. The model included random intercept and random 
                            slope for response time."), 
          caption.placement = "top", table.placement="t", floating.environment = "table*",
          math.style.exponents = TRUE, type = "latex", booktabs = TRUE, file = file_name)
}

In [5]:
run_analysis_2(stats.df)

"Model failed to converge with max|grad| = 0.00405055 (tol = 0.001, component 1)"

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: binomial  ( logit )
Formula: is_com ~ c.(RT) * c.(coherence) + (c.(RT) * c.(coherence) | subj_id)
   Data: stats.df

     AIC      BIC   logLik deviance df.resid 
 12329.2  12446.2  -6150.6  12301.2    31503 

Scaled residuals: 
   Min     1Q Median     3Q    Max 
-2.627 -0.269 -0.169 -0.096 32.770 

Random effects:
 Groups  Name                 Variance Std.Dev. Corr             
 subj_id (Intercept)          2.1164   1.4548                    
         c.(RT)               0.8508   0.9224   -0.57            
         c.(coherence)        3.3324   1.8255    0.93 -0.35      
         c.(RT):c.(coherence) 5.4914   2.3434    0.34 -0.40  0.00
Number of obs: 31517, groups:  subj_id, 13

Fixed effects:
                     Estimate Std. Error z value Pr(>|z|)    
(Intercept)           -3.5386     0.4059  -8.719  < 2e-16 ***
c.(RT)                 1.1659     0.2840   4.105 4.04e-05 ***
c.(c

# Analysis 3. Response time as a function of coherence

In [11]:
rnd_effects_analysis_3 <- function(stats.df){
    rnd1.lmer = lmer(RT ~ (1|subj_id), stats.df)

    rnd2.lmer = lmer(RT ~ (c.(coherence)|subj_id), stats.df)

    rnd3.lmer = lmer(RT ~ (is_correct|subj_id), stats.df)

    rnd4.lmer = lmer(RT ~ ((c.(coherence)+is_correct)|subj_id), stats.df)
    
    rnd5.lmer = lmer(RT ~ ((c.(coherence)*is_correct)|subj_id), stats.df)
    
    rnd.anova = anova(rnd1.lmer, rnd2.lmer, rnd3.lmer, rnd4.lmer, rnd5.lmer)
#     rnd.anova = anova(rnd4.lmer, rnd5.lmer)
    print(rnd.anova)
    print("Best model according to AIC")
    print(row.names(rnd.anova[rnd.anova$AIC==min(rnd.anova$AIC), ]))
    print("Best model according to BIC")
    print(row.names(rnd.anova[rnd.anova$BIC==min(rnd.anova$BIC), ]))
}

In [12]:
rnd_effects_analysis_3(stats.df)

"Model failed to converge with max|grad| = 0.173306 (tol = 0.002, component 1)"boundary (singular) fit: see ?isSingular
refitting model(s) with ML (instead of REML)


Data: stats.df
Models:
rnd1.lmer: RT ~ (1 | subj_id)
rnd2.lmer: RT ~ (c.(coherence) | subj_id)
rnd3.lmer: RT ~ (is_correct | subj_id)
rnd4.lmer: RT ~ ((c.(coherence) + is_correct) | subj_id)
rnd5.lmer: RT ~ ((c.(coherence) * is_correct) | subj_id)
          Df   AIC   BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)    
rnd1.lmer  3 18551 18576 -9272.6    18545                             
rnd2.lmer  5 13271 13313 -6630.5    13261 5284.3      2     <2e-16 ***
rnd3.lmer  5 16778 16820 -8384.0    16768    0.0      0          1    
rnd4.lmer  8 12929 12996 -6456.4    12913 3855.3      3     <2e-16 ***
rnd5.lmer 12 12542 12642 -6259.1    12518  394.5      4     <2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
[1] "Best model according to AIC"
[1] "rnd5.lmer"
[1] "Best model according to BIC"
[1] "rnd5.lmer"


rnd5 is the best

In [13]:
run_analysis_3 <- function(stats.df, exp_name){
    choice.mer = lmer(RT ~ ((c.(coherence)*is_correct) | subj_id) + c.(coherence)*is_correct, stats.df)
    print(summary(choice.mer))

    choice.output = summary(choice.mer)$coefficients

    row.names(choice.output) <- c("Intercept", "Coherence", "Is correct", "Coherence by Is correct")

    file_name = paste(output_table_path, "RT_vs_coh.tex", sep="")
    print(xtable(choice.output, digits = c(4,4,4,4,4,4),
                 label = "tab:RT_vs_coh",
                 caption = "Parameters of a linear mixed-effects model analysing response time as
                            a function of coherence and choice correctness. The model included random intercept and random 
                            slopes for coherence, correctness, and their interaction."), 
          caption.placement = "top", table.placement="t", floating.environment = "table*",
          math.style.exponents = TRUE, type = "latex", booktabs = TRUE, file = file_name)
}

In [14]:
run_analysis_3(stats.df)

"Model failed to converge with max|grad| = 0.0561677 (tol = 0.002, component 1)"

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: RT ~ ((c.(coherence) * is_correct) | subj_id) + c.(coherence) *  
    is_correct
   Data: stats.df

REML criterion at convergence: 12521.3

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-4.6172 -0.4711 -0.0474  0.4122 13.8346 

Random effects:
 Groups   Name                         Variance Std.Dev. Corr             
 subj_id  (Intercept)                  0.077363 0.27814                   
          c.(coherence)                0.414880 0.64411  -0.13            
          is_correctTrue               0.009093 0.09536  -0.83 -0.29      
          c.(coherence):is_correctTrue 0.297843 0.54575  -0.58 -0.66  0.91
 Residual                              0.086563 0.29422                   
Number of obs: 31517, groups:  subj_id, 13

Fixed effects:
                             Estimate Std. Error       df t value Pr(>|t|)    
(Intercept)                   0.20311    0.07738 11.96332  

# Difference between initial and final decisions in CoM trials

In [15]:
run_binom_test <- function(data, coherence){
    binom.test(x = nrow(data[(data$is_correct=='True') & (data$coherence==coherence),]), 
           n = nrow(data[data$coherence==coherence,]), 
           p = 0.5, alternative = "greater", conf.level = 0.95)
}

In [19]:
run_binom_test(stats.df[stats.df$is_com=='True',], coherence=0.032)


	Exact binomial test

data:  nrow(data[(data$is_correct == "True") & (data$coherence == coherence),  and nrow(data[data$coherence == coherence, ])    ]) and nrow(data[data$coherence == coherence, ])
number of successes = 214, number of trials = 387, p-value = 0.02094
alternative hypothesis: true probability of success is greater than 0.5
95 percent confidence interval:
 0.5099114 1.0000000
sample estimates:
probability of success 
             0.5529716 


In [18]:
run_binom_test(stats.df[stats.df$is_com=='True',], coherence=0.064)


	Exact binomial test

data:  nrow(data[(data$is_correct == "True") & (data$coherence == coherence),  and nrow(data[data$coherence == coherence, ])    ]) and nrow(data[data$coherence == coherence, ])
number of successes = 223, number of trials = 376, p-value = 0.0001801
alternative hypothesis: true probability of success is greater than 0.5
95 percent confidence interval:
 0.5496429 1.0000000
sample estimates:
probability of success 
             0.5930851 


In [16]:
run_binom_test(stats.df[stats.df$is_com=='True',], coherence=0.128)


	Exact binomial test

data:  nrow(data[(data$is_correct == "True") & (data$coherence == coherence),  and nrow(data[data$coherence == coherence, ])    ]) and nrow(data[data$coherence == coherence, ])
number of successes = 252, number of trials = 352, p-value < 2.2e-16
alternative hypothesis: true probability of success is greater than 0.5
95 percent confidence interval:
 0.6736846 1.0000000
sample estimates:
probability of success 
             0.7159091 


In [17]:
run_binom_test(stats.df[stats.df$is_com=='True',], coherence=0.256)


	Exact binomial test

data:  nrow(data[(data$is_correct == "True") & (data$coherence == coherence),  and nrow(data[data$coherence == coherence, ])    ]) and nrow(data[data$coherence == coherence, ])
number of successes = 206, number of trials = 246, p-value < 2.2e-16
alternative hypothesis: true probability of success is greater than 0.5
95 percent confidence interval:
 0.793655 1.000000
sample estimates:
probability of success 
             0.8373984 


In [20]:
run_binom_test(stats.df[stats.df$is_com=='True',], coherence=0.512)


	Exact binomial test

data:  nrow(data[(data$is_correct == "True") & (data$coherence == coherence),  and nrow(data[data$coherence == coherence, ])    ]) and nrow(data[data$coherence == coherence, ])
number of successes = 151, number of trials = 165, p-value < 2.2e-16
alternative hypothesis: true probability of success is greater than 0.5
95 percent confidence interval:
 0.8705371 1.0000000
sample estimates:
probability of success 
             0.9151515 
