In [1]:
# display plots inline
%matplotlib notebook

# imports
import os
import numpy as np
import pandas as pd
import pymc3 as pm
from bambi import Model, Prior
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats
import pymc3_utils as pmu

# suppress system warnings for legibility
import warnings
warnings.filterwarnings('ignore')

# resize plots to fit labels inside bounding box
from matplotlib import rcParams
rcParams.update({'figure.autolayout': True})

# MPI color scheme
sns.set(style='white', palette='Set2')

  PANDAS_TYPES = (pd.Series, pd.DataFrame, pd.Panel)
  import pandas.util.testing as tm


# Cambridge Recognition Memory Tasks
## Adjusting the CRMT scores first
An anonymous review asked whether it would make a difference to adjust the CRMT scores for general cognitive ability first, and then regress reading scores onto the adjusted CRMT scores. In this notebook, we perform the analysis in this order to assess whether it does make a difference.

Note that throughout the notebook, this order of doing things creates statistical issues that are not fatal, but slightly confusing nonetheless (which is why we chose to work the other way around in the submitted paper).

## Loading reading, Raven's, and digit span scores

In [2]:
df_reading = pd.read_csv('data/reading_intercepts.tsv', sep='\t')[['pp', 'ravens_intercept', 'span_intercept', 'raw_reading_score']]
display(df_reading.head().round(2))

Unnamed: 0,pp,ravens_intercept,span_intercept,raw_reading_score
0,low_1,0.34,0.47,12.5
1,low_10,-0.17,0.02,45.5
2,low_11,0.1,0.47,32.5
3,low_12,0.34,0.25,22.5
4,low_13,-0.46,0.69,23.5


## Loading object memory data

In [3]:
# start by loading the data
df_comt = pd.read_csv('data/cambridge.tsv', sep='\t').dropna()
df_comt = df_comt.merge(df_reading, left_on='pp', right_on='pp').dropna()
display(df_comt.head().round(2))

Unnamed: 0.1,Unnamed: 0,trialNo,RT,ACC,literate,number,visual_noise,learn_trial,category,item,trial,pp,subject,ravens_intercept,span_intercept,raw_reading_score
0,11,1.0,3.22,1.0,low,1,no_noise,learn,bikes,bikes_1.0,0,low_1,1,0.34,0.47,12.5
1,12,2.0,11.06,1.0,low,1,no_noise,learn,bikes,bikes_2.0,1,low_1,1,0.34,0.47,12.5
2,13,3.0,4.83,1.0,low,1,no_noise,learn,bikes,bikes_3.0,2,low_1,1,0.34,0.47,12.5
3,18,4.0,7.58,0.0,low,1,no_noise,learn,bikes,bikes_4.0,3,low_1,1,0.34,0.47,12.5
4,19,5.0,4.34,1.0,low,1,no_noise,learn,bikes,bikes_5.0,4,low_1,1,0.34,0.47,12.5


## Dummy coding, scaling, centering

In [4]:
# standardize reading scores
df_comt['raw_reading_score'] = pmu.standardize(df_comt['raw_reading_score'])
df_comt['ravens_intercept'] = pmu.standardize(df_comt['ravens_intercept'])
df_comt['span_intercept'] = pmu.standardize(df_comt['span_intercept'])

# create dummies for cars and faces, bikes will be the default
df_comt['cars'] = pd.get_dummies(df_comt['category'])['cars']
df_comt['faces'] = pd.get_dummies(df_comt['category'])['faces']
df_comt['bikes'] = pd.get_dummies(df_comt['category'])['bikes']

# create dummy for visual noise, no noise will be the default
df_comt['noise'] = pd.get_dummies(df_comt['visual_noise'])['noise']

# create dummy for learn trials, not learn will be the default
df_comt['learn'] = pd.get_dummies(df_comt['learn_trial'])['learn']

# dump learning trials
df_comt = df_comt[df_comt['learn'] == 0]

display(df_comt.head().round(2))

Unnamed: 0.1,Unnamed: 0,trialNo,RT,ACC,literate,number,visual_noise,learn_trial,category,item,...,pp,subject,ravens_intercept,span_intercept,raw_reading_score,cars,faces,bikes,noise,learn
18,51,19.0,6.92,1.0,low,1,no_noise,no_learn,bikes,bikes_19.0,...,low_1,1,0.5,0.82,-0.61,0,0,1,0,0
19,52,46.0,5.51,1.0,low,1,no_noise,no_learn,bikes,bikes_46.0,...,low_1,1,0.5,0.82,-0.61,0,0,1,0,0
20,53,36.0,3.03,1.0,low,1,no_noise,no_learn,bikes,bikes_36.0,...,low_1,1,0.5,0.82,-0.61,0,0,1,0,0
21,54,31.0,3.28,0.0,low,1,no_noise,no_learn,bikes,bikes_31.0,...,low_1,1,0.5,0.82,-0.61,0,0,1,0,0
22,55,30.0,3.56,1.0,low,1,no_noise,no_learn,bikes,bikes_30.0,...,low_1,1,0.5,0.82,-0.61,0,0,1,0,0


## Regressing Raven's and digit span scores out of CRMT scores

In [5]:
# default model params
defaults = {
    'samples': 5000,
    'tune': 2500,
    'chains': 4,
    'init': 'advi+adapt_diag',
    'family': 'bernoulli',
    'priors': {'fixed': 'narrow', 'random': 'narrow'},
}

In [6]:
model1 = Model(df_comt)
model1.fit('ACC ~ noise*cars + noise*faces + ravens_intercept + span_intercept',
           random=['1|pp'],
           **defaults)

Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = 10,427:  42%|████▏     | 20799/50000 [01:41<02:22, 205.62it/s]
Convergence achieved at 20800
Interrupted at 20,799 [41%]: Average Loss = 11,061
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [1|pp_offset, 1|pp_sd, span_intercept, ravens_intercept, noise:faces, faces, noise:cars, cars, noise, Intercept]
Sampling 4 chains, 0 divergences: 100%|██████████| 30000/30000 [09:13<00:00, 54.24draws/s]


<bambi.results.MCMCResults at 0x110a4da90>

## Extract participant intercepts

In [7]:
pps = df_reading['pp'].unique()
pp_nums = [f'1|pp[{i}]' for i in range(len(pps))]

df_intercepts = pm.summary(model1.backend.trace).loc[pp_nums]
df_intercepts['pp'] = np.sort(pps)

display(df_intercepts.head().round(2))

Unnamed: 0,mean,sd,hpd_3%,hpd_97%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat,pp
1|pp[0],-0.39,0.15,-0.67,-0.1,0.0,0.0,25628.0,22661.0,25627.0,14022.0,1.0,low_1
1|pp[1],0.36,0.15,0.07,0.64,0.0,0.0,31138.0,24369.0,31149.0,14634.0,1.0,low_10
1|pp[2],-0.42,0.15,-0.7,-0.13,0.0,0.0,24939.0,22353.0,24912.0,14395.0,1.0,low_11
1|pp[3],0.33,0.15,0.04,0.62,0.0,0.0,32889.0,24190.0,32913.0,14101.0,1.0,low_12
1|pp[4],-0.33,0.16,-0.64,-0.02,0.0,0.0,17388.0,15800.0,17390.0,14379.0,1.0,low_13


In [8]:
df_intercepts = df_intercepts[['pp', 'mean']].rename(columns={'mean': 'adjusted_crmt_score'})
df_intercepts = df_intercepts.merge(df_reading,
                                    left_on='pp', right_on='pp').reset_index()

# standardize scores
df_intercepts['raw_reading_score'] = pmu.standardize(df_intercepts['raw_reading_score'])
df_intercepts['ravens_intercept'] = pmu.standardize(df_intercepts['ravens_intercept'])
df_intercepts['span_intercept'] = pmu.standardize(df_intercepts['span_intercept'])
df_intercepts['adjusted_crmt_score'] = pmu.standardize(df_intercepts['adjusted_crmt_score'])

display(df_intercepts.head().round(2))

Unnamed: 0,index,pp,adjusted_crmt_score,ravens_intercept,span_intercept,raw_reading_score
0,0,low_1,-1.21,0.5,0.82,-0.6
1,1,low_10,1.11,-0.24,0.04,0.81
2,2,low_11,-1.32,0.15,0.81,0.25
3,3,low_12,1.03,0.49,0.43,-0.17
4,4,low_13,-1.03,-0.66,1.19,-0.13


## Regressing reading score onto adjusted CRMT scores
The adjusted CRMT scores (i.e., participant intercepts) are linear, rather than a binomial (this is just a consequence of using a logit-link generalized linear model). This is easy enough to deal with though; we just change our model family for the adjusted CRMT model to linear, rather than bernoulli.

In [9]:
# default model params
defaults = {
    'samples': 5000,
    'tune': 2500,
    'chains': 4,
    'init': 'advi+adapt_diag',
    'priors': {'fixed': 'narrow', 'random': 'narrow'},
}

In [10]:
model2 = Model(df_intercepts)
model2.fit('adjusted_crmt_score ~ raw_reading_score',
           **defaults)

Auto-assigning NUTS sampler...
Initializing NUTS using advi+adapt_diag...
Average Loss = 141.17:  26%|██▌       | 12999/50000 [00:04<00:12, 3053.51it/s]
Convergence achieved at 13000
Interrupted at 12,999 [25%]: Average Loss = 201.59
Multiprocess sampling (4 chains in 4 jobs)
NUTS: [adjusted_crmt_score_sd, raw_reading_score, Intercept]
Sampling 4 chains, 47 divergences: 100%|██████████| 30000/30000 [00:10<00:00, 2855.94draws/s]
There were 21 divergences after tuning. Increase `target_accept` or reparameterize.
There were 10 divergences after tuning. Increase `target_accept` or reparameterize.
There were 2 divergences after tuning. Increase `target_accept` or reparameterize.
There were 14 divergences after tuning. Increase `target_accept` or reparameterize.


<bambi.results.MCMCResults at 0x1273f9a20>

In [11]:
display(pm.summary(model2.backend.trace, credible_interval=.95).round(2))

Unnamed: 0,mean,sd,hpd_2.5%,hpd_97.5%,mcse_mean,mcse_sd,ess_mean,ess_sd,ess_bulk,ess_tail,r_hat
Intercept[0],-0.0,0.1,-0.18,0.19,0.0,0.0,11538.0,8324.0,11548.0,11484.0,1.0
raw_reading_score[0],0.1,0.09,-0.07,0.28,0.0,0.0,11456.0,10801.0,11460.0,12017.0,1.0
adjusted_crmt_score_sd,0.95,0.04,0.88,1.0,0.0,0.0,11607.0,11341.0,8364.0,5749.0,1.0


Readers will note that although we've elected to present the analysis as a Bayesian regression for the sake of being consistent, at this point, we're really just taking the bivariate correlation between adjusted CRMT score and reading score, since we've already accounted for other sources of variance.

## Conclusions
We arrive at a coefficient estimate that is higher than the one we get using the procedure presented in the paper (.05 in the paper, .1 here) but crucially the mean estimate is still positive (meaning we find no evidence for destructive competition). The uncertainty intervals however are much larger using this procedure, overlapping with zero.

More importantly though, carrying out the analysis in this order reveals a very big drawback: We can't easily compare the models for adjusted and unadjusted CRMT score, since the adjusted model is linear, and the unadjusted model is binomial. There are two ways to deal with this issue:
1. Ignore it and compare the coefficients directly anyway. Because of the logit-link the coefficients are on a linear scale (log-odds) even in the binomial model. This is conceptually a bit muddled though.
2. Get linear participant intercepts for the unadjusted model too, by using the same two-step adjustment procedure, but not including Raven's and digit span scores as predictors in the first step.

We find neither of these options to be particularly appealing, mostly because they are conceptually so much more confusing than the alternative we present in the paper: Adjust the reading score for general cognitive ability first, then see how much variability in CRMT scores the adjusted reading score explains. Our approach allows the adjusted and unadjusted reading score models to be compared directly very easily.