In [1]:
import os
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import DataFrame, read_csv
sns.set_theme(style='white', context='notebook', font_scale=1.33)

## Section 1: Self-Report Data

In [2]:
## Load data.
surveys = read_csv(os.path.join('data', 'surveys.csv'))
mars = read_csv(os.path.join('data', 'mars.csv'))

## Apply rejections.
reject = read_csv(os.path.join('data', 'reject.csv'))
surveys = surveys[surveys.subject.isin(reject.query('reject == 0').subject)].reset_index(drop=True)
mars = mars[mars.subject.isin(reject.query('reject == 0').subject)].reset_index(drop=True)

## Handle missing responses.
mars = mars.fillna(0)

## Compute sum scores.
surveys['nfc10'] = surveys.filter(regex='nfc10_q').values[:,:-1].sum(axis=1)
surveys['pcf'] = surveys.filter(regex='pcf_q').values[:,:-1].sum(axis=1)
surveys['sns'] = surveys.filter(regex='sns_q').values[:,:-1].sum(axis=1)

## Merge scores.
scores = mars.groupby('subject').accuracy.sum().reset_index(name='mars')
scores = scores.merge(surveys[['subject','nfc10','pcf','sns']])

### 1.1 Summarize data

In [3]:
## Compute summary tables.
summary = scores.describe().T
corr = scores.corr(method='spearman')

## Merge tables.
summary = summary.merge(corr, left_index=True, right_index=True)

## Restrict to variables of interest.
summary = summary[['mean','std','25%','75%','nfc10','pcf','sns','mars']].round(2)
summary

Unnamed: 0,mean,std,25%,75%,nfc10,pcf,sns,mars
mars,8.0,2.53,6.0,10.0,-0.04,0.04,0.14,1.0
nfc10,25.03,8.27,20.0,31.0,1.0,0.27,0.46,-0.04
pcf,22.17,6.29,18.75,26.25,0.27,1.0,0.29,0.04
sns,29.07,7.48,25.0,35.0,0.46,0.29,1.0,0.14


In [4]:
print(summary.to_latex())

\begin{tabular}{lrrrrrrrr}
\toprule
{} &   mean &   std &    25\% &    75\% &  nfc10 &   pcf &   sns &  mars \\
\midrule
mars  &   8.00 &  2.53 &   6.00 &  10.00 &  -0.04 &  0.04 &  0.14 &  1.00 \\
nfc10 &  25.03 &  8.27 &  20.00 &  31.00 &   1.00 &  0.27 &  0.46 & -0.04 \\
pcf   &  22.17 &  6.29 &  18.75 &  26.25 &   0.27 &  1.00 &  0.29 &  0.04 \\
sns   &  29.07 &  7.48 &  25.00 &  35.00 &   0.46 &  0.29 &  1.00 &  0.14 \\
\bottomrule
\end{tabular}



In [5]:
from itertools import combinations
from scipy.stats import spearmanr

for x, y in combinations(['mars','nfc10','pcf','sns'], 2):
    rho, pval = spearmanr(scores[x], scores[y])
    print('[%s, %s]:\trho = %0.3f pval = %0.3f' %(x, y, rho, pval))

[mars, nfc10]:	rho = -0.037 pval = 0.522
[mars, pcf]:	rho = 0.038 pval = 0.512
[mars, sns]:	rho = 0.139 pval = 0.016
[nfc10, pcf]:	rho = 0.274 pval = 0.000
[nfc10, sns]:	rho = 0.465 pval = 0.000
[pcf, sns]:	rho = 0.291 pval = 0.000
