### ANALYSIS NOTEBOOK  - DONNELLY 2019 PLOS ONE

#### Patrick M. Donnelly  

#### University of Washington

#### September 25, 2020

In [36]:
# import necessary databases and libraries
import pandas as pd
import numpy as np
from scipy import stats

In [37]:
# pull data from data folder in repository
data = pd.read_csv('data/data.csv')

### Demographics Table
T-tests and Wilcoxon signed rank tests for Demographics Table 1

#### Age

In [73]:
stats.wilcoxon(corr_data.visit_age, corr_data_cntrl.visit_age)

WilcoxonResult(statistic=67.0, pvalue=0.15600357596610903)

#### Gender

In [74]:
stats.wilcoxon(corr_data.gender, corr_data_cntrl.gender)

WilcoxonResult(statistic=24.0, pvalue=0.36571229628151325)

#### Norm-referenced Measures

In [75]:
# WJ Basic Reading Skills composite
stats.ttest_ind(corr_data.wj_brs, corr_data_cntrl.wj_brs)

Ttest_indResult(statistic=-0.7682348480557991, pvalue=0.4470957932899269)

In [76]:
# TOWRE-2 Index
stats.ttest_ind(corr_data.twre_index, corr_data_cntrl.twre_index)

Ttest_indResult(statistic=-0.17356371189538922, pvalue=0.8631296423986305)

In [77]:
# WASI-II FS-2 Composite
stats.ttest_ind(corr_data.wasi_fs2, corr_data_cntrl.wasi_fs2)

Ttest_indResult(statistic=-0.591043444366228, pvalue=0.557990829021477)

In [78]:
# CTOPP-2 Phonological Awareness composite
stats.ttest_ind(corr_data.ctopp_pa, corr_data_cntrl.ctopp_pa)

Ttest_indResult(statistic=-1.1042543990303033, pvalue=0.2764262478932494)

In [79]:
# CTOPP-2 Rapid Naming composite
stats.ttest_ind(corr_data.ctopp_rapid, corr_data_cntrl.ctopp_rapid)

Ttest_indResult(statistic=-0.5255955660152828, pvalue=0.6022230750972477)

### Correlation Analysis

In [38]:
# look at difference scores and practice metrics
data_sifted = data[['record_id','int_session', 'gender', 'pigs_casecontrol', 'word_acc_diff', 
                  'pseudo_acc_diff', 'first_acc_diff', 'second_rate_diff', 'pigs_practice_numstories', 
                    'visit_age', 'wj_brs', 'twre_index', 'ctopp_rapid', 'wasi_fs2', 'ctopp_pa', 'ctopp_pm']]

In [39]:
# look just at intervention participants
corr_data = data_sifted[data_sifted['pigs_casecontrol'] == 1]
corr_data_cntrl = data_sifted[data_sifted['pigs_casecontrol'] == 0]
# Look just at session 2 for data clarity
corr_data = corr_data[corr_data['int_session'] == 2]
corr_data_cntrl = corr_data_cntrl[corr_data_cntrl['int_session'] == 2]

#### Growth and Practice - Intervention Group

In [40]:
stats.pearsonr(corr_data['word_acc_diff'], corr_data['pigs_practice_numstories'])

(-0.1434484895843433, 0.5462773240391416)

In [41]:
stats.pearsonr(corr_data['pseudo_acc_diff'], corr_data['pigs_practice_numstories'])

(0.21916216396969712, 0.3532158391938578)

#### Growth and Practice - Control Group

In [42]:
stats.pearsonr(corr_data_cntrl['word_acc_diff'], corr_data_cntrl['pigs_practice_numstories'])

(0.00914727683396166, 0.9694688991495032)

In [43]:
stats.pearsonr(corr_data_cntrl['pseudo_acc_diff'], corr_data_cntrl['pigs_practice_numstories'])

(-0.37493954998435575, 0.10333943437958235)

#### Real Word Decoding & Predictors

In [44]:
stats.pearsonr(corr_data['word_acc_diff'], corr_data['visit_age'])

(-0.35460379946585185, 0.12501074358035694)

In [45]:
stats.pearsonr(corr_data['word_acc_diff'], corr_data['wasi_fs2'])

(-0.14126636219201166, 0.552461664144954)

In [46]:
stats.pearsonr(corr_data['word_acc_diff'], corr_data['ctopp_pa'])

(-0.30060915969261875, 0.19780291884054285)

In [47]:
stats.pearsonr(corr_data['word_acc_diff'], corr_data['ctopp_pm'])

(-0.2614814449698232, 0.2654511378942844)

#### Pseudo Word Decoding & Predictors

In [48]:
stats.pearsonr(corr_data['pseudo_acc_diff'], corr_data['visit_age'])

(-0.1696877785130529, 0.4744752524150999)

In [49]:
stats.pearsonr(corr_data['pseudo_acc_diff'], corr_data['wasi_fs2'])

(-0.4934371784542496, 0.02704102262642834)

In [50]:
stats.pearsonr(corr_data['pseudo_acc_diff'], corr_data['ctopp_pa'])

(-0.5202673299480975, 0.018692151119533245)

In [51]:
stats.pearsonr(corr_data['pseudo_acc_diff'], corr_data['ctopp_pm'])

(-0.47537405162983504, 0.03414714542588039)

#### Passage Reading Accuracy & Predictors

In [52]:
# resift data so that nan-removal is only affected by nans in accuracy
data_accuracy = data[['record_id','int_session', 'pigs_casecontrol', 'word_acc_diff', 
                  'pseudo_acc_diff', 'first_acc_diff', 'pigs_practice_numstories', 
                    'visit_age', 'wj_brs', 'twre_index', 'ctopp_rapid', 'wasi_fs2', 'ctopp_pa', 'ctopp_pm']]
# look just at intervention participants
corr_accuracy = data_accuracy[data_accuracy['pigs_casecontrol'] == 1]
corr_accuracy_cntrl = data_accuracy[data_accuracy['pigs_casecontrol'] == 0]
corr_accuracy = corr_accuracy[corr_accuracy['int_session'] == 2].dropna()
corr_accuracy_cntrl = corr_accuracy_cntrl[corr_accuracy_cntrl['int_session'] == 2].dropna()

In [53]:
stats.pearsonr(corr_accuracy['first_acc_diff'], corr_accuracy['visit_age'])

(-0.5487026978511689, 0.027734757085666365)

In [54]:
stats.pearsonr(corr_accuracy['first_acc_diff'], corr_accuracy['wasi_fs2'])

(-0.3207478258173936, 0.22580135086925088)

In [55]:
stats.pearsonr(corr_accuracy['first_acc_diff'], corr_accuracy['ctopp_pa'])

(-0.34968459546887287, 0.18429142619946642)

In [56]:
stats.pearsonr(corr_accuracy['first_acc_diff'], corr_accuracy['ctopp_pm'])

(-0.42777920507510525, 0.09835307127353096)

#### Passage Reading Rate & Predictors

In [57]:
# resift data so that nan-removal is only affected by nans in rate
data_rate = data[['record_id','int_session', 'pigs_casecontrol', 'word_acc_diff', 
                  'pseudo_acc_diff', 'second_rate_diff', 'pigs_practice_numstories', 
                    'visit_age', 'wj_brs', 'twre_index', 'ctopp_rapid', 'wasi_fs2', 'ctopp_pa', 'ctopp_pm']]
# look just at intervention participants
corr_rate = data_rate[data_rate['pigs_casecontrol'] == 1]
corr_rate_cntrl = data_rate[data_rate['pigs_casecontrol'] == 0]
corr_rate = corr_rate[corr_rate['int_session'] == 2].dropna()
corr_rate_cntrl = corr_rate_cntrl[corr_rate_cntrl['int_session'] == 2].dropna()

In [58]:
stats.pearsonr(corr_rate['second_rate_diff'], corr_rate['visit_age'])

(0.10551986550099002, 0.7082035274306163)

In [59]:
stats.pearsonr(corr_rate['second_rate_diff'], corr_rate['wasi_fs2'])

(0.02321689070870936, 0.9345449615179485)

In [60]:
stats.pearsonr(corr_rate['second_rate_diff'], corr_rate['ctopp_pa'])

(0.052418300923755295, 0.8528141946197414)

In [61]:
stats.pearsonr(corr_rate['second_rate_diff'], corr_rate['ctopp_pm'])

(0.1980046221242171, 0.4793229079994812)

### Effect Size Analyses

#### Real Word

##### data structure most relevant is corr_data for intervention group and corr_data_cntrl for control group

In [62]:
x = corr_data.word_acc_diff
y = corr_data_cntrl.word_acc_diff

In [63]:
d = (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)
print("Cohen's d: ",d)

Cohen's d:  0.5686864296949145


#### Pseudo Word

In [64]:
x = corr_data.pseudo_acc_diff
y = corr_data_cntrl.pseudo_acc_diff

In [65]:
d = (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)
print("Cohen's d: ",d)

Cohen's d:  0.7429769651763583


#### Passage Accuracy

In [66]:
x = corr_accuracy.first_acc_diff
y = corr_accuracy_cntrl.first_acc_diff

In [67]:
d = (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)
print("Cohen's d: ",d)

Cohen's d:  0.3635296154143578


#### Passage Rate

In [68]:
x = corr_rate.second_rate_diff
y = corr_rate_cntrl.second_rate_diff

In [69]:
d = (np.mean(x) - np.mean(y)) / np.sqrt((np.std(x, ddof=1) ** 2 + np.std(y, ddof=1) ** 2) / 2.0)
print("Cohen's d: ",d)

Cohen's d:  0.1270477151527451
