In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import DataFrame, read_csv, concat
sns.set_theme(style='white', context='notebook', font_scale=1.33)

## Section 1: Self-report data

In [2]:
## Load MaRs-IB data.
mars = concat([
    read_csv(os.path.join('data', 'shortform', 'mars.csv')).assign(form='sf'),
    read_csv(os.path.join('data', 'longform', 'mars.csv')).assign(form='lf')
])

## Load survey data.
surveys = concat([
    read_csv(os.path.join('data', 'shortform', 'surveys.csv')),
    read_csv(os.path.join('data', 'longform', 'surveys.csv'))
])

## Apply rejections.
reject = concat([
    read_csv(os.path.join('data', 'shortform', 'reject.csv')),
    read_csv(os.path.join('data', 'longform', 'reject.csv'))
])
surveys = surveys[surveys.subject.isin(reject.query('reject == 0').subject)].reset_index(drop=True)
mars = mars[mars.subject.isin(reject.query('reject == 0').subject)].reset_index(drop=True)

## Handle missing responses.
mars = mars.fillna(0)

## Compute sum scores.
surveys['nfc10'] = surveys.filter(regex='nfc10_q').values[:,:-1].sum(axis=1)
surveys['pcf'] = surveys.filter(regex='pcf_q').values[:,:-1].sum(axis=1)
surveys['sns'] = surveys.filter(regex='sns_q').values[:,:-1].sum(axis=1)

## Merge scores.
scores = mars.pivot_table('accuracy', 'subject', 'form', aggfunc=np.sum).reset_index()
scores = scores.merge(surveys[['subject','nfc10','pcf','sns']])

### 1.1 Score summary

In [3]:
## Compute summary tables.
summary = scores.describe().T
corr = scores.corr(method='spearman')

## Merge tables.
summary = summary.merge(corr, left_index=True, right_index=True)

## Restrict to variables of interest.
cols = ['mean','std','25%','75%','nfc10','pcf','sns','sf','lf']
rows = ['nfc10','pcf','sns','sf','lf']

summary = summary.loc[rows,cols].round(2)
summary

Unnamed: 0,mean,std,25%,75%,nfc10,pcf,sns,sf,lf
nfc10,24.44,9.01,19.0,31.0,1.0,0.28,0.42,-0.04,0.12
pcf,22.58,6.45,18.0,28.0,0.28,1.0,0.32,0.04,-0.06
sns,29.02,7.36,24.75,35.0,0.42,0.32,1.0,0.14,0.18
sf,8.0,2.53,6.0,10.0,-0.04,0.04,0.14,1.0,
lf,15.73,5.13,12.0,20.0,0.12,-0.06,0.18,,1.0


In [4]:
print(summary.to_latex())

\begin{tabular}{lrrrrrrrrr}
\toprule
{} &   mean &   std &    25\% &   75\% &  nfc10 &   pcf &   sns &    sf &    lf \\
\midrule
nfc10 &  24.44 &  9.01 &  19.00 &  31.0 &   1.00 &  0.28 &  0.42 & -0.04 &  0.12 \\
pcf   &  22.58 &  6.45 &  18.00 &  28.0 &   0.28 &  1.00 &  0.32 &  0.04 & -0.06 \\
sns   &  29.02 &  7.36 &  24.75 &  35.0 &   0.42 &  0.32 &  1.00 &  0.14 &  0.18 \\
sf    &   8.00 &  2.53 &   6.00 &  10.0 &  -0.04 &  0.04 &  0.14 &  1.00 &   NaN \\
lf    &  15.73 &  5.13 &  12.00 &  20.0 &   0.12 & -0.06 &  0.18 &   NaN &  1.00 \\
\bottomrule
\end{tabular}



  print(summary.to_latex())


In [5]:
from itertools import combinations
from scipy.stats import spearmanr

for x, y in combinations(['sf','lf','nfc10','pcf','sns'], 2):
    rho, pval = spearmanr(*scores[[x,y]].dropna().values.T)
    print('[%s, %s]:\trho = %0.3f pval = %0.3f' %(x, y, rho, pval))

[sf, lf]:	rho = nan pval = nan
[sf, nfc10]:	rho = -0.037 pval = 0.522
[sf, pcf]:	rho = 0.038 pval = 0.512
[sf, sns]:	rho = 0.139 pval = 0.016
[lf, nfc10]:	rho = 0.116 pval = 0.045
[lf, pcf]:	rho = -0.059 pval = 0.311
[lf, sns]:	rho = 0.175 pval = 0.002
[nfc10, pcf]:	rho = 0.281 pval = 0.000
[nfc10, sns]:	rho = 0.423 pval = 0.000
[pcf, sns]:	rho = 0.318 pval = 0.000
