# Load Libraries and Resources

In [1]:
library('readr')
library('blabr')
library('plyr')
library('dplyr')
library('ggplot2')
library('rjson')
library('parallel')
library("readxl")
library('dotwhisker')
library('broom')
library('lme4')
library('irr')
library('tuneR')
library('ggExtra')
library('brms')
library('ggstance')
source('PLEARN_analysis_helper.R')
source('texvars.R')



Attaching package: ‘dplyr’


The following objects are masked from ‘package:plyr’:

    arrange, count, desc, failwith, id, mutate, rename, summarise,
    summarize


The following objects are masked from ‘package:stats’:

    filter, lag


The following objects are masked from ‘package:base’:

    intersect, setdiff, setequal, union


Loading required package: Matrix

Loading required package: lpSolve

Loading required package: Rcpp

Loading 'brms' package (version 2.17.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').


Attaching package: ‘brms’


The following object is masked from ‘package:lme4’:

    ngrps


The following object is masked from ‘package:stats’:

    ar



Attaching package: ‘ggstance’


The following objects are masked from ‘package:ggplot2’:

    geom_errorbarh, GeomErrorbarh




# Load Objects from Data Preparation

In [2]:
subject_info = readRDS(file='intermediate_data/subject_info.RData')

# Participant Properties

In [3]:
tvc = initialize_texvar_cache()

In [4]:
ranN = nrow(subset(subject_info, type == 'child' & ran_participant)) 
tvc = update_texvar_cache(tvc, 'ranN', ranN)
ranN

In [5]:
ranNScene = nrow(subset(subject_info, type == 'child' & ran_participant & expt_version == 'scene')) 
ranNScene
tvc = update_texvar_cache(tvc, 'ranNScene', ranNScene)
ranNRedBlue = nrow(subset(subject_info, type == 'child' & ran_participant & expt_version == 'redblue')) 
ranNRedBlue
tvc = update_texvar_cache(tvc, 'ranNRedBlue', ranNRedBlue)

ranNAgreement = nrow(subset(subject_info, type == 'child' & ran_participant & expt_version == 'agreement')) 
ranNAgreement
tvc = update_texvar_cache(tvc, 'ranNAgreement', ranNAgreement)

ranNAgreementLookit = nrow(subset(subject_info, type == 'child' & ran_participant & expt_version == 'agreement-lookit')) 
ranNAgreementLookit
tvc = update_texvar_cache(tvc, 'ranNAgreementLookit', ranNAgreementLookit)

In [6]:
subject_info$child_in_study = subject_info$expt_version %in% c('redblue', 'scene','agreement') &
    subject_info$type == 'child' &
    (subject_info$include_storybook | subject_info$include_eyetracking)
# mark which of the participants are children and have usable data in at least one of the tasks
# we are excluding LookIt participants at this point as it isn't one of the three main studies

In [7]:
if (length(subset(subject_info, filename != '')$filename) !=
length(unique(subset(subject_info, filename != '')$filename))){
    stop('Repeated filenames in the subject info file; look for repeats')
}

In [8]:
#number of total kids tested, before exclusions 
totalN = nrow(subset(subject_info, type == 'child' & ran_participant))
tvc = update_texvar_cache(tvc, 'totalN', totalN)
totalN

In [9]:
#number of children with some amount of usable study
studyN = nrow(subset(subject_info, type == 'child' & child_in_study))
tvc = update_texvar_cache(tvc, 'studyN', studyN)
studyN

In [10]:
# number of kids in study 1
sceneN = nrow(subset(subject_info, expt_version == 'scene' & type == 'child' & child_in_study))
sceneN
tvc = update_texvar_cache(tvc, 'sceneN', sceneN)

In [11]:
# of kids in study 2
redblueN =  nrow(subset(subject_info, expt_version == 'redblue' & type == 'child' & child_in_study))
redblueN
tvc = update_texvar_cache(tvc, 'redblueN', redblueN)

In [12]:
# of kids in study 3 -- in the lab
agreementN =  nrow(subset(subject_info, expt_version == 'agreement' & type == 'child' & child_in_study))
agreementN
tvc = update_texvar_cache(tvc, 'agreementN', agreementN)

In [13]:
# of adultss
redBlueAdultN = nrow(subset(subject_info, type == 'adult' & expt_version == 'redblue'))
tvc = update_texvar_cache(tvc, 'redBlueAdultN', redBlueAdultN)
sceneAdultN = nrow(subset(subject_info, type == 'adult' & expt_version == 'scene'))
tvc = update_texvar_cache(tvc, 'sceneAdultN', sceneAdultN)

# Eyetracking Exclusions

These are **manual exclusions**, noted by the experimenter. Automated exclusion filters are enforced in notebook 03

In [14]:
# usable eyetracking
redblue_eyetracking_exclusions = subset(subject_info, type == 'child' & 
        expt_version == 'redblue' & !include_eyetracking & child_in_study)
redblueNExclusions = nrow(redblue_eyetracking_exclusions)
tvc = update_texvar_cache(tvc, 'redblueNExclusions', redblueNExclusions)
redblue_eyetracking_exclusions$comment

In [15]:
scene_eyetracking_exclusions = subset(subject_info, type == 'child' & expt_version == 'scene'
                                      & !include_eyetracking & child_in_study)
sceneNExclusions = nrow(scene_eyetracking_exclusions)
tvc = update_texvar_cache(tvc, 'sceneNExclusions', sceneNExclusions)
scene_eyetracking_exclusions$comment

In [16]:
agreement_eyetracking_exclusions = subset(subject_info, type == 'child' & expt_version == 'agreement'
                                      & !include_eyetracking & child_in_study)
agreementNExclusions = nrow(agreement_eyetracking_exclusions)
tvc = update_texvar_cache(tvc, 'agreementNExclusions', agreementNExclusions)
agreement_eyetracking_exclusions$comment

In [17]:
agreementLookit_eyetracking_exclusions = subset(subject_info, type == 'child' & expt_version == 'agreement-lookit'
                                      & !include_eyetracking & child_in_study)
agreementLookitNExclusions = nrow(agreementLookit_eyetracking_exclusions)
tvc = update_texvar_cache(tvc, 'agreementLookitNExclusions', agreementLookitNExclusions)
agreementLookit_eyetracking_exclusions$comment

# Storybook Exclusions

In [18]:
redblue_storybook_exclusions = subset(subject_info, type == 'child' & expt_version %in% 
                                      c('redblue') & !include_storybook & child_in_study)
redblue_storybook_exclusionN = nrow(redblue_storybook_exclusions)
redblue_storybook_exclusionN
tvc = update_texvar_cache(tvc, 'nRedBlueStorybookExclusions', redblue_storybook_exclusionN)
redblue_storybook_exclusions$comment

In [19]:
scene_storybook_exclusions = subset(subject_info, type == 'child' & expt_version %in% 
                                      c('scene') & !include_storybook & child_in_study)
scene_storybook_exclusionN = nrow(scene_storybook_exclusions)
scene_storybook_exclusionN
tvc = update_texvar_cache(tvc, 'nSceneStorybookExclusions', scene_storybook_exclusionN)
scene_storybook_exclusions$comment

In [20]:
agreement_storybook_exclusions = subset(subject_info, type == 'child' & expt_version %in% 
                                      c('agreement') & !include_storybook & child_in_study)
agreement_storybook_exclusionN = nrow(agreement_storybook_exclusions)
agreement_storybook_exclusionN
tvc = update_texvar_cache(tvc, 'nAgreementStorybookExclusions', agreement_storybook_exclusionN)
agreement_storybook_exclusions$comment

In [21]:
agreementLookit_storybook_exclusions = subset(subject_info, type == 'child' & expt_version %in% 
                                      c('agreement-lookit') & !include_storybook & child_in_study)
agreementLookit_storybook_exclusionN = nrow(agreementLookit_storybook_exclusions)
agreementLookit_storybook_exclusionN
tvc = update_texvar_cache(tvc, 'nAgreementLookitStorybookExclusions', agreementLookit_storybook_exclusionN)
agreementLookit_storybook_exclusions$comment

# Demographic Properties

In [22]:
non_lookit_subjects = subset(subject_info, 
    expt_version  %in% c('redblue','scene','agreement') & child_in_study)

percentFem = 100 * mean(subset(non_lookit_subjects, !is.na(gender) & gender != ''
        & type == 'child')$gender == 'F', na.rm=T)
tvc = update_texvar_cache(tvc, 'percentFem', percentFem)

numFem = sum(subset(non_lookit_subjects, !is.na(gender) & gender != ''
        & type == 'child' )$gender == 'F')
tvc = update_texvar_cache(tvc, 'numFem', numFem)

meanAge = mean(subset(non_lookit_subjects, type == 'child' & type == 'child' )$age_in_months, na.rm=T)
tvc = update_texvar_cache(tvc, 'meanAge', meanAge)

sdAge = sd(subset(non_lookit_subjects, type == 'child' & type == 'child')$age_in_months, na.rm=T)
tvc = update_texvar_cache(tvc, 'sdAge', sdAge)

# Write Out TexVars

In [23]:
writeout_texvar_cache(tvc, 'texvars/plearn_analysis_01.tex',appendDate=T)

# Select Storybook Participants for Recoding

In [24]:
# storbybook_kids = subset(subject_info, type == 'child' & include_storybook & in_study)

In [25]:
# recode_df = data.frame(sample(storbybook_kids$participant_name, length(storbybook_kids$participant_name),
#       replace = F))
# names(recode_df) = 'participant_name'
# write.csv(recode_df, 'storybook_participants_for_recoding.csv', row.names=F)

In [26]:
# file_len <- function(fil) {
#   if (file.info(fil)$size != 0) {
#     wavHeader <- readWave(fil, header = TRUE)
#     wavHeader$samples / wavHeader$sample.rate
#   } else {
#     0
#   }
# }

# file_len('audio/pl52.wav')

In [27]:
# storybook_df = data.frame(file = list.files('audio'))
# storybook_df$duration = sapply(storybook_df$file,
#                                function(x){
#                                    file_len(paste('audio',x, sep='/'))
#                                    })
# storybook_df$duration_in_minutes = storybook_df$duration / 60

In [28]:
# storybook_df[order(storybook_df$duration_in_minutes),]

In [29]:
# sum(storybook_df$duration_in_minutes) / 60

# Save the R objects for loading in later notebooks

In [30]:
saveRDS(subject_info, file='intermediate_data/subject_info.RData')

In [31]:
nrow(subject_info)