# Natural Statistics Cross-linguistic: 

#### Proportion of single-word utterances analysis

----

In [6]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import contingent_extraction
import warnings
warnings.filterwarnings('ignore')

In [7]:
rand_dat_inc = pd.read_csv("../data/rand_dat_inc_master.csv",index_col=0,low_memory=False)
rand_dat_inc=rand_dat_inc[rand_dat_inc["language"]!="ara"]
rand_dat_inc=rand_dat_inc[(rand_dat_inc["target_child_age"]>=5) & (rand_dat_inc["target_child_age"]<=30)]
rand_dat_inc_cg = rand_dat_inc[rand_dat_inc["caregiver"]=="caregiver"]

rand_dat_inc_cg["contingent"] = np.where(rand_dat_inc_cg["contingent"]==1, "contingent", "non-contingent")

rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"].notna()]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="xxx"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="yyy"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="www"]

rand_dat_inc_cg["swu"]=np.where(rand_dat_inc_cg["num_tokens"]==1,1,0)

In [8]:
# add play context and year of study

play_context = pd.read_csv("../data/context_data.csv")
play_context = play_context.rename(columns={"Corpus": "corpus_name"})

# print(play_context.to_markdown())

rand_dat_inc_cg = rand_dat_inc_cg.merge(play_context,on='corpus_name')

rand_dat_inc_cg["context"] = rand_dat_inc_cg["Location"] + rand_dat_inc_cg["Activity"]

rand_dat_inc_cg["context"] = rand_dat_inc_cg["context"].replace({"HomeBook-reading":"Home: book reading",
                                                                 "HomeInterview/Unstructured":"Home: interview/unstructured",
                                                                 "HomeNaN":"Home: unreported",
                                                                 "HomeOther":"Home: other",
                                                                 "HomeUnstructured":"Home: unstructured",
                                                                 "LabOther":"Lab: other",
                                                                 "LabTabletop play":"Lab: tabletop play",
                                                                 "LabInterview/Unstructured":"Lab: interview/unstructured",
                                                                 "LabUnstructured":"Lab: unstructured",
                                                                 np.nan:"Unreported",
                                                                 "OtherUnstructured":"Other: unstructured"})

# year of study
corpora_year = pd.read_csv("../data/corpora_year.csv")
corpora_year = corpora_year.rename(columns={"Corpora": "corpus_name"})
corpora_year = corpora_year[["corpus_name", "Year collected"]]

rand_dat_inc_cg = rand_dat_inc_cg.merge(corpora_year,on='corpus_name')

In [9]:
%load_ext rpy2.ipython

The rpy2.ipython extension is already loaded. To reload it, use:
  %reload_ext rpy2.ipython


In [10]:
%%R -i rand_dat_inc_cg

library("lme4")
library("repr")
library("knitr")
library("broom")
library("emmeans")
library("tidyverse")
library("kableExtra")

options(repr.plot.width=6, repr.plot.height=12, scipen=999)

Loading required package: Matrix
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()

Attaching package: ‘kableExtra’

The following object is masked from ‘package:dplyr’:

    group_rows



In [11]:
%%R -o rand_dat_inc_cg

library("kableExtra")

caregiver_type <- rand_dat_inc_cg %>%
  group_by(transcript_id) %>%
  summarise(
    caregiver_type = case_when(
      all(speaker_role == "Mother") ~ "Mother only",
      all(speaker_role == "Father") ~ "Father only",
      any(speaker_role %in% c("Mother", "Father")) ~ "Mother & Father",
      TRUE ~ "Unknown"
    )
  )

# caregiver_type %>%
#     kbl(format="pipe")
    
# ggplot(caregiver_type, aes(x = 1, y = caregiver_type, fill = factor(caregiver_type))) + 
#   geom_col() +
#   coord_polar(theta = "y") +
#   theme_void()

rand_dat_inc_cg <- rand_dat_inc_cg %>%
  left_join(caregiver_type)

Joining with `by = join_by(transcript_id)`


In [12]:
rand_swu_stats = (rand_dat_inc_cg.groupby(["Language_name","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_sumstats =  rand_swu_stats.rename({'mean': 'means'}, axis=1)

In [13]:
%%R -i rand_swu_sumstats

# import rand_swu_sumstats into R

NULL


In [14]:
%%R -o rand_swu_sumstats

rand_swu_sumstats <- rand_swu_sumstats %>%
    left_join(caregiver_type)

Joining with `by = join_by(transcript_id)`


In [15]:
rand_swu_sumstats.to_csv("../data/rand_swu_sumstats.csv")

----
#### Proportion single-word utterances plot

In [16]:
%%R

xlabs <- c("C", "NC")

# # ara_label <- data.frame(means=c(.9),contingent = c(1.5),language="ara")
deu_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="German")
# deu_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
# est_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Korean")
nor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_swu_sumstats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = fas_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_label,label = "**",size=8,color="black") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +    
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_label,label = "ns", size=4, color="black",fontface = "italic") +
     ylim(0, .5) +
     labs(tag="C",
          y = "Proportion of Single Word Utterances",
          x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust = 0.5),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
      ggsave("../figures/token_rand_swu.pdf", width = 11.7, height = 6.2)


1: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
ℹ Please use the `fun` argument instead.
generated. 
2: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
generated. 
3: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
ℹ Please use the `linewidth` argument instead.
generated. 
4: Removed 228 rows containing non-finite values (`stat_summary()`). 
5: Removed 228 rows containing non-finite values (`stat_summary()`). 


for manuscript

In [17]:
%%R -i rand_swu_sumstats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

xlabs <- c("C", "NC")

# # ara_label <- data.frame(means=c(.9),contingent = c(1.5),language="ara")
deu_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="German")
# deu_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="English")
est_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
# est_ns_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Korean")
nor_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Norwegian")
pol_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Portuguese")
spa_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(means=c(.47),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(means=c(.5),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_swu_sumstats, aes(x = contingent, y = means, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") +
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = fas_label,label = "ns", size=4,color="black",fontface = "italic") + 
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_label,label = "**",size=8,color="black") +  
     geom_text(data = pol_label,label = "ns", size=4,color="black",fontface = "italic") +    
     geom_text(data = por_label,label = "***",size=8,color="black") +  
     geom_text(data = spa_label,label = "***",size=8,color="black") + 
     geom_text(data = swe_label,label = "***",size=8,color="black") + 
     geom_text(data = zho_label,label = "ns", size=4, color="black",fontface = "italic") +
     ylim(0, .5) +
     labs(tag="C",
          y = "Proportion of Single Word Utterances",
          x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=11.5),
           axis.text.x = element_text(vjust = 0.5, hjust=0.5),
           legend.position="none")

      ggsave("../figures/figure_2_C.pdf", width = 11.5, height = 4.2)

1: Removed 228 rows containing non-finite values (`stat_summary()`). 
2: Removed 228 rows containing non-finite values (`stat_summary()`). 


Plot + effect estimates

In [18]:
%%R

deu_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="German")
eng_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="English")
est_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Estonian")
fra_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="French")
hrv_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Croatian")
jpn_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Japanese")
kor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Korean")
nor_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Norwegian")
por_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Portuguese")
spa_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Spanish")
swe_est_label <- data.frame(means=c(.02),contingent = c(1),Language_name="Swedish")

p <- p + geom_text(data = deu_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = eng_est_label,label = "est=.09",size=4,color="black") +
         geom_text(data = est_est_label,label = "est=.04",size=4,color="black") +
         geom_text(data = fra_est_label,label = "est=.04",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "est=.18",size=4,color="black") +
         geom_text(data = kor_est_label,label = "est=.08",size=4,color="black") +
         geom_text(data = nor_est_label,label = "est=.03",size=4,color="black") +
         geom_text(data = por_est_label,label = "est=.06",size=4,color="black") +
         geom_text(data = spa_est_label,label = "est=.06",size=4,color="black") +
         geom_text(data = swe_est_label,label = "est=.15",size=4,color="black")
         

ggsave("../figures/token_swu_rand_eff.pdf", width = 11.7, height = 6.2)

1: Removed 228 rows containing non-finite values (`stat_summary()`). 
2: Removed 228 rows containing non-finite values (`stat_summary()`). 


\+ sample size

In [19]:
%%R

deu_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="German")
eng_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="English")
est_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Estonian")
fas_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Persian")
fra_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="French")
hrv_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Croatian")
jpn_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Japanese")
kor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Korean")
nor_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Norwegian")
pol_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Polish")
por_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Portuguese")
spa_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Spanish")
swe_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Swedish")
zho_n_label <- data.frame(means=c(.02),contingent = c(1.7),Language_name="Mandarin")

deu_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="German")
eng_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="English")
est_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Estonian")
fas_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Persian")
fra_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="French")
hrv_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Croatian")
jpn_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Japanese")
kor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Korean")
nor_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Norwegian")
pol_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Polish")
por_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Portuguese")
spa_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Spanish")
swe_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Swedish")
zho_sz_label <- data.frame(means=c(.02),contingent = c(2.1),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " = 1005",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 303",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")
         

ggsave("../figures/token_swu_rand_eff_n.pdf", width = 11.7, height = 6.2)

1: Removed 228 rows containing non-finite values (`stat_summary()`). 
2: Removed 228 rows containing non-finite values (`stat_summary()`). 


----
#### Statistical analyses

By language

In [20]:
SWU_dat = rand_dat_inc_cg[['Language_name','swu','contingent','transcript_id','target_child_id','caregiver_type']]

In [22]:
%%R

SWU_dat %>%
    filter(Language_name %in% case_study_cgtype_compare)

       Language_name swu     contingent transcript_id target_child_id
144565        Korean   0 non-contingent         28133           17372
144566        Korean   0 non-contingent         28133           17372
144567        Korean   0 non-contingent         28133           17372
144568        Korean   0 non-contingent         28133           17372
144569        Korean   0 non-contingent         28133           17372
144570        Korean   0 non-contingent         28133           17372
144571        Korean   0 non-contingent         28133           17372
144572        Korean   0     contingent         28133           17372
144573        Korean   0 non-contingent         28133           17372
144574        Korean   0     contingent         28133           17372
144575        Korean   0 non-contingent         28133           17372
144576        Korean   1     contingent         28133           17372
144577        Korean   0 non-contingent         28133           17372
144578        Korean

In [21]:
%%R -i SWU_dat

# vectors for rows to remove from lmer
case_study <- c("Mandarin", "Persian") # only 1 target child analyzed

case_study_cgtype_compare <- c("Korean") # only 1 target child analyzed, varies in CG type

no_cgtype_compare <- c("Portuguese") # only `Mother only`

single_tran <- c("Polish") # only 1 transcript

# nests of models
swu_nest1 <- SWU_dat %>%
    filter(!Language_name %in% case_study) %>%
    filter(!Language_name %in% single_tran) %>%
    filter(!Language_name %in% no_cgtype_compare) %>%
    filter(!Language_name %in% case_study_cgtype_compare) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent +
                                (1|target_child_id) +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`z.ratio`,`t.ratio`), .before = p.value) %>%
    select (-c(`z.ratio`,`t.ratio`))

swu_nest2 <- SWU_dat %>%
    filter(Language_name %in% case_study_cgtype_compare) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent + caregiver_type
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))

swu_nest3 <- SWU_dat %>%
    filter(Language_name %in% no_cgtype_compare) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent +
                                (1|target_child_id) +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))

swu_nest4 <- SWU_dat %>%
    filter(Language_name %in% case_study) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`z.ratio`,`t.ratio`), .before = p.value) %>%
    select (-c(`z.ratio`,`t.ratio`))

swu_nest5 <- SWU_dat %>%
    filter(Language_name %in% single_tran) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(swu ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    rename(statistic = `t.ratio`)
    
# number of transcripts per language
sample_size <- SWU_dat %>%
    group_by(Language_name) %>%
    summarize(n = n_distinct(transcript_id))
    
# combine lmer summaries and correct p-values for multiple comparisons
emms_all <- list(swu_nest1, swu_nest2, swu_nest3, swu_nest4, swu_nest5) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 14)) %>%
    left_join(sample_size)

Error in `mutate()`:
ℹ In argument: `fit = map(...)`.
ℹ In group 1: `Language_name = "Korean"`.
Caused by error in `map()`:
ℹ In index: 1.
Caused by error in `caregiver_type()`:
! could not find function "caregiver_type"
Run `rlang::last_trace()` to see where the error occurred.
boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 5162' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 5162)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 5162' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 5162)' or larger];
but be warned that this may

RInterpreterError: Failed to parse and evaluate line '\n# vectors for rows to remove from lmer\ncase_study <- c("Mandarin", "Persian") # only 1 target child analyzed\n\ncase_study_cgtype_compare <- c("Korean") # only 1 target child analyzed, varies in CG type\n\nno_cgtype_compare <- c("Portuguese") # only `Mother only`\n\nsingle_tran <- c("Polish") # only 1 transcript\n\n# nests of models\nswu_nest1 <- SWU_dat %>%\n    filter(!Language_name %in% case_study) %>%\n    filter(!Language_name %in% single_tran) %>%\n    filter(!Language_name %in% no_cgtype_compare) %>%\n    filter(!Language_name %in% case_study_cgtype_compare) %>%\n    group_by(Language_name) %>%\n    nest() %>%\n    mutate(fit = map(data, ~ lmer(swu ~ contingent +\n                                (1|target_child_id) +\n                                (1|transcript_id),\n                                data = .,\n                                REML= FALSE)),\n           summary = map(fit, ~ emmeans(., "contingent")),\n           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),\n           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%\n    select(Language_name, contrasts, effect_size) %>%\n    unnest(cols = c(contrasts)) %>%\n    mutate(effect_size = map(effect_size, ~ summary(.))) %>%\n    unnest() %>%\n    mutate(statistic = coalesce(`z.ratio`,`t.ratio`), .before = p.value) %>%\n    select (-c(`z.ratio`,`t.ratio`))\n\nswu_nest2 <- SWU_dat %>%\n    filter(Language_name %in% case_study_cgtype_compare) %>%\n    group_by(Language_name) %>%\n    nest() %>%\n    mutate(fit = map(data, ~ lmer(swu ~ contingent + caregiver_type\n                                (1|transcript_id),\n                                data = .,\n                                REML= FALSE)),\n           summary = map(fit, ~ emmeans(., "contingent")),\n           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),\n           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%\n    select(Language_name, contrasts, effect_size) %>%\n    unnest(cols = c(contrasts)) %>%\n    mutate(effect_size = map(effect_size, ~ summary(.))) %>%\n    unnest() %>%\n    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%\n    select (-c(`t.ratio`))\n\nswu_nest3 <- SWU_dat %>%\n    filter(Language_name %in% no_cgtype_compare) %>%\n    group_by(Language_name) %>%\n    nest() %>%\n    mutate(fit = map(data, ~ lmer(swu ~ contingent +\n                                (1|target_child_id) +\n                                (1|transcript_id),\n                                data = .,\n                                REML= FALSE)),\n           summary = map(fit, ~ emmeans(., "contingent")),\n           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),\n           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%\n    select(Language_name, contrasts, effect_size) %>%\n    unnest(cols = c(contrasts)) %>%\n    mutate(effect_size = map(effect_size, ~ summary(.))) %>%\n    unnest() %>%\n    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%\n    select (-c(`t.ratio`))\n\nswu_nest4 <- SWU_dat %>%\n    filter(Language_name %in% case_study) %>%\n    group_by(Language_name) %>%\n    nest() %>%\n    mutate(fit = map(data, ~ lmer(swu ~ contingent +\n                                (1|transcript_id),\n                                data = .,\n                                REML= FALSE)),\n           summary = map(fit, ~ emmeans(., "contingent")),\n           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),\n           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%\n    select(Language_name, contrasts, effect_size) %>%\n    unnest(cols = c(contrasts)) %>%\n    mutate(effect_size = map(effect_size, ~ summary(.))) %>%\n    unnest() %>%\n    mutate(statistic = coalesce(`z.ratio`,`t.ratio`), .before = p.value) %>%\n    select (-c(`z.ratio`,`t.ratio`))\n\nswu_nest5 <- SWU_dat %>%\n    filter(Language_name %in% single_tran) %>%\n    group_by(Language_name) %>%\n    nest() %>%\n    mutate(fit = map(data, ~ lm(swu ~ contingent,\n                                data = .,\n                                REML= FALSE)),\n           summary = map(fit, ~ emmeans(., "contingent")),\n           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),\n           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%\n    select(Language_name, contrasts, effect_size) %>%\n    unnest(cols = c(contrasts)) %>%\n    mutate(effect_size = map(effect_size, ~ summary(.))) %>%\n    unnest() %>%\n    rename(statistic = `t.ratio`)\n    \n# number of transcripts per language\nsample_size <- SWU_dat %>%\n    group_by(Language_name) %>%\n    summarize(n = n_distinct(transcript_id))\n    \n# combine lmer summaries and correct p-values for multiple comparisons\nemms_all <- list(swu_nest1, swu_nest2, swu_nest3, swu_nest4, swu_nest5) %>% \n    reduce(bind_rows) %>%\n    mutate(p.value = p.adjust(p.value, "holm", 14)) %>%\n    left_join(sample_size)\n'.
R error message: 'Error in mutate(., fit = map(data, ~lmer(swu ~ contingent + caregiver_type(1 |  : \n  \nℹ In group 1: `Language_name = "Korean"`.\nCaused by error in `map()`:\nℹ In index: 1.\nCaused by error in `caregiver_type()`:\n! could not find function "caregiver_type"'
R stdout:
boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 5162' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 5162)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 5162' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 5162)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 90009' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 90009)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 90009' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 90009)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 19642' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 19642)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 19642' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 19642)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4212' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4212)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4212' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4212)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 23066' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 23066)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 23066' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 23066)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 3494' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 3494)' or larger];
but be warned that this may result in large computation time and memory use.
Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 3494' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 3494)' or larger];
but be warned that this may result in large computation time and memory use.
Warning message:
`cols` is now required when using `unnest()`.
ℹ Please use `cols = c(effect_size)`.

format statistics table

In [18]:
%%R

table_maker = function(data) { data %>%
    select(Language_name, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,Language)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language (n)", c('Language','n'), sep=" (") %>%
    mutate(`Language (n)` = paste0(`Language (n)`,")")) %>%
    arrange(`Language (n)`)
    }

SWU_stats_table <- table_maker(emms_all)

kable(SWU_stats_table)



|Language (n)    |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:---------------|:-------------|--------------:|-----------:|:----------------|
|Croatian (58)   |0.05 (0.01)   |           4.31|        0.13|0.0002           |
|English (872)   |0.09 (0)      |          24.94|        0.24|<.0001           |
|Estonian (22)   |0.03 (0.02)   |           1.89|        0.09|0.8164           |
|French (275)    |0.05 (0.01)   |           7.16|        0.13|<.0001           |
|German (38)     |0.08 (0.01)   |           6.33|        0.21|<.0001           |
|Japanese (160)  |0.17 (0.01)   |          25.46|        0.36|<.0001           |
|Korean (28)     |0.07 (0.01)   |           5.92|        0.22|<.0001           |
|Mandarin (2)    |0.07 (0.05)   |           1.43|        0.22|1.0000           |
|Norwegian (26)  |0.07 (0.02)   |           3.38|        0.19|0.0104           |
|Persian (11)    |0.1 (0.06)    |           1.56|        0.20|1.0000           |
|Polish (1)      |0.04 (0.

In [19]:
%%R 

# add columns sample and measure and save

SWU_stats_table %>%
    mutate(sample = "rand",
           measure = "swu") %>%
    write.csv(file = "../data/rand_swu_stats.csv")

By play context

In [20]:
SWU_dat = rand_dat_inc_cg[['Language_name','swu','contingent','transcript_id','target_child_id','context']]

In [26]:
%%R -i SWU_dat

SWU_dat %>% 
    group_by(transcript_id, context, contingent) %>%
    summarize(prop_swu = mean(swu)) %>%
    ggplot(aes(fill = context)) +
        geom_histogram(aes(prop_swu)) +
        facet_wrap(context ~ contingent) +
    coord_cartesian(ylim = c(0,80)) +
    theme_classic()
    
ggsave("../figures/context_SWU.pdf", width = 8, height = 4)

`summarise()` has grouped output by 'transcript_id', 'context'. You can
override using the `.groups` argument.
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


In [3]:
%%R -i SWU_dat

# vectors for rows to remove from lmer
single_tran <- c("Home: interview/unstructured") # only 1 transcript

contex_sample_size <- SWU_dat %>%
    group_by(context) %>%
    summarize(n = n_distinct(transcript_id))

swu_contex_nest_1 <- SWU_dat %>%
    filter(!context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(context, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`z.ratio`,`t.ratio`), .before = p.value) %>%
    select (-c(`z.ratio`,`t.ratio`))
    
swu_contex_nest_2 <- SWU_dat %>%
    filter(context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(swu ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise")))) %>%
    select(context, contrasts) %>%
    unnest(cols = c(contrasts))  %>%
    rename(statistic = `t.ratio`)

# combine lmer summaries and correct p-values for multiple comparisons
context_emms_all <- list(swu_contex_nest_1, swu_contex_nest_2) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 7)) %>%
    left_join(contex_sample_size)

UsageError: Cell magic `%%R` not found.


In [4]:
%%R

table_maker = function(data) { data %>%
    select(context, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Play context", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Play context`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Play context (n)", c(`Play context`,'n'), sep=" (") %>%
    mutate(`Play context (n)` = paste0(`Play context (n)`,")")) %>%
    arrange(`Play context (n)`)
    }
    
swu_context_stats_table <- table_maker(context_emms_all)

kable(swu_context_stats_table)

UsageError: Cell magic `%%R` not found.


By context, dropping English

In [5]:
# drop English

SWU_dat_no_eng = SWU_dat[SWU_dat["Language_name"] != "English"]

NameError: name 'SWU_dat' is not defined

In [None]:
%%R -i SWU_dat_no_eng

# # vectors for rows to remove from lmer
single_tran <- c("Home: interview/unstructured") # only 1 transcript

contex_sample_size_no_eng <- SWU_dat_no_eng %>%
    group_by(context) %>%
    summarize(n = n_distinct(transcript_id))

swu_contex_nest_1_no_eng <- SWU_dat_no_eng %>%
    filter(!context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(swu ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(context, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`z.ratio`), .before = p.value) %>%
    select (-c(`z.ratio`))

swu_contex_nest_2_no_eng <- SWU_dat_no_eng %>%
    filter(context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(swu ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise")))) %>%
    select(context, contrasts) %>%
    unnest(cols = c(contrasts))  %>%
    rename(statistic = `t.ratio`)
    
# combine lmer summaries and correct p-values for multiple comparisons
context_no_eng_emms_all <- list(swu_contex_nest_1_no_eng, swu_contex_nest_2_no_eng) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 3)) %>%
    left_join(contex_sample_size_no_eng)

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 3742' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 3742)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 3742' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 3742)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 57089' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 57089)' or larger];
but be warned that this may result in large computation time and memory

Joining with `by = join_by(context)`


In [None]:
%%R 

table_maker = function(data) { data %>%
    select(context, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Play context", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Play context`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Play context (n)", c(`Play context`,'n'), sep=" (") %>%
    mutate(`Play context (n)` = paste0(`Play context (n)`,")")) %>%
    arrange(`Play context (n)`)
    }
    
mlu_context_stats_table_no_eng <- table_maker(context_no_eng_emms_all)

kable(mlu_context_stats_table_no_eng)



|Play context (n)                 |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:--------------------------------|:-------------|--------------:|-----------:|:----------------|
|Home: book reading (28)          |0.07 (0.01)   |           5.92|        0.22|<.0001           |
|Home: interview/unstructured (1) |0.04 (0.12)   |           0.36|          NA|1.0000           |
|Home: unstructured (560)         |0.11 (0)      |          27.47|        0.26|<.0001           |
|NA (101)                         |0.09 (0.01)   |           7.67|        0.19|<.0001           |


By language family

In [None]:
rand_swu_stats_fam = (rand_dat_inc_cg.groupby(["Language_Family","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_fam = rand_swu_stats_fam.rename({'mean': 'means'}, axis=1)

In [None]:
%%R -i rand_swu_stats_fam

library('ggplot2')

# figure

p <- ggplot(rand_swu_stats_fam, aes(x = contingent, y = means, color = Language_Family)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_Family,ncol=5) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))

    ggsave("../figures/token_swu_family.pdf", width = 11.7, height = 6.2)
    
# statistical analysis

fam_sample_size <- rand_swu_stats_fam %>%
    group_by(Language_Family) %>%
    summarize(n = n_distinct(transcript_id))
    
swu_fam_nest <- rand_swu_stats_fam %>%
    group_by(Language_Family) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(means ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_Family, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(fam_sample_size)
    
table_maker = function(data) { data %>%
    select(Language_Family, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language Family", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Language Family`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language Family (n)", c(`Language Family`,'n'), sep=" (") %>%
    mutate(`Language Family (n)` = paste0(`Language Family (n)`,")")) %>%
    arrange(`Language Family (n)`)
    }
    
swu_fam_stats_table <- table_maker(swu_fam_nest)

kable(swu_fam_stats_table)

Joining, by = "Language_Family"


|Language Family (n)  |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:--------------------|:-------------|--------------:|-----------:|:----------------|
|Indo-European (1483) |0.08 (0.01)   |          16.25|        0.61|<.0001           |
|Japonic (160)        |0.18 (0.01)   |          19.75|        2.21|<.0001           |
|Koreanic (28)        |0.08 (0.02)   |           5.45|        1.48|<.0001           |
|Sino-Tibetan (2)     |0.09 (0.02)   |           4.55|        6.43|0.0522           |
|Uralic (22)          |0.06 (0.02)   |           3.36|        1.04|0.0134           |


By language Genus

In [None]:
rand_swu_stats_gen = (rand_dat_inc_cg.groupby(["Language_Genus","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_gen = rand_swu_stats_gen.rename({'mean': 'means'}, axis=1)

In [None]:
%%R -i rand_swu_stats_gen

library('ggplot2')

# plot

p <- ggplot(rand_swu_stats_gen, aes(x = contingent, y = means, color = Language_Genus)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_Genus,ncol=8) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
    ggsave("../figures/token_swu_genus.pdf", width = 11.7, height = 6.2)
    
# statistical analysis

gen_sample_size <- rand_swu_stats_gen %>%
    group_by(Language_Genus) %>%
    summarize(n = n_distinct(transcript_id))
    
swu_gen_nest <- rand_swu_stats_gen %>%
    group_by(Language_Genus) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(means ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_Genus, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(gen_sample_size)
    
table_maker = function(data) { data %>%
    select(Language_Genus, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language Genus", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Language Genus`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language Genus (n)", c(`Language Genus`,'n'), sep=" (") %>%
    mutate(`Language Genus (n)` = paste0(`Language Genus (n)`,")")) %>%
    arrange(`Language Genus (n)`)
    }
    
swu_gen_stats_table <- table_maker(swu_gen_nest)

kable(swu_gen_stats_table)

Joining, by = "Language_Genus"


|Language Genus (n) |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:------------------|:-------------|--------------:|-----------:|:----------------|
|Chinese (2)        |0.09 (0.02)   |           4.55|        6.43|0.0522           |
|Finnic (22)        |0.06 (0.02)   |           3.36|        1.04|0.0134           |
|Germanic (1077)    |0.09 (0.01)   |          14.69|        0.65|<.0001           |
|Iranian (11)       |0.12 (0.05)   |           2.58|        1.15|0.1205           |
|Japanese (160)     |0.18 (0.01)   |          19.75|        2.21|<.0001           |
|Korean (28)        |0.08 (0.02)   |           5.45|        1.48|<.0001           |
|Romance (335)      |0.05 (0.01)   |           5.89|        0.46|<.0001           |
|Savlic (60)        |0.07 (0.02)   |           4.28|        0.79|0.0003           |


By agglutinative status

In [None]:
rand_swu_stats_aggl = (rand_dat_inc_cg.groupby(["Agglutinative","target_child_id","transcript_id","contingent"])
                                  .swu
                                  .agg(["mean"])
                                  .reset_index())
rand_swu_stats_aggl =  rand_swu_stats_aggl.rename({'mean': 'means'}, axis=1)

In [None]:
%%R -i rand_swu_stats_aggl

library('ggplot2')

# plot

p <- ggplot(rand_swu_stats_aggl, aes(x = contingent, y = means, color = Agglutinative)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Agglutinative,ncol=2) + 
     ylim(0, .5) +
     labs(y = "Prop. single word utt.", x = "") +
     theme_classic() +
     theme(text = element_text(size=16),
           axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))

    ggsave("../figures/token_swu_aggl.pdf", width = 11.7, height = 6.2)
  
# statistical analysis
    
agg_sample_size <- rand_swu_stats_aggl %>%
    group_by(Agglutinative) %>%
    summarize(n = n_distinct(transcript_id))
    
swu_agg_nest <- rand_swu_stats_aggl %>%
    group_by(Agglutinative) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(means ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Agglutinative, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(agg_sample_size)
    
table_maker = function(data) { data %>%
    select(Agglutinative, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Agglutinative Status", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Agglutinative Status`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Agglutinative Status (n)", c(`Agglutinative Status`,'n'), sep=" (") %>%
    mutate(`Agglutinative Status (n)` = paste0(`Agglutinative Status (n)`,")")) %>%
    arrange(`Agglutinative Status (n)`)
    }
    
swu_agg_stats_table <- table_maker(swu_agg_nest)

kable(swu_agg_stats_table)

Joining, by = "Agglutinative"


|Agglutinative Status (n) |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:------------------------|:-------------|--------------:|-----------:|:----------------|
|0 (1485)                 |0.08 (0.01)   |          16.27|        0.61|<.0001           |
|1 (210)                  |0.15 (0.01)   |          19.10|        1.87|<.0001           |


----

#### SWU mixed models

In [None]:
deu=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="deu"]
eng=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="eng"]
est=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="est"]
fas=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="fas"]
fra=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="fra"]
hrv=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="hrv"]
jpn=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="jpn"]
kor=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="kor"]
nor=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="nor"]
pol=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="pol"]
por=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="por"]
spa=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="spa"]
swe=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="swe"]
zho=rand_dat_inc_cg[['language','swu','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="zho"]

In [None]:
%%R

library("lme4")
library("broom")
library("emmeans")
library("lmerTest")
library("tidyverse")

options(scipen = 999)

effect_sizes <- data.frame(matrix(ncol = 2, nrow = 0))
cols <- c("Language_name", "rand_effect_size")
colnames(effect_sizes) <- cols

R[write to console]: Loading required package: Matrix

R[write to console]: 
Attaching package: ‘lmerTest’


R[write to console]: The following object is masked from ‘package:lme4’:

    lmer


R[write to console]: The following object is masked from ‘package:stats’:

    step


R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

R[write to console]: ✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.0      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
✔ purrr   0.3.5      

R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()



In [None]:
%%R -i deu

lm2_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=deu)
emm2_1<-emmeans(lm2_1,pairwise~contingent)
pval<-summary(emm2_1$contrasts)$p.value
print(c(emm2_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# plot(emm2_1)
# summary(emmeans(lm2_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm2_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

deu_lname <- deu$language[1]

deu_eff <- eff_size(emm2_1,sigma = sigma(lm2_1), edf = df.residual(lm2_1))

deu_eff <- summary(deu_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(deu_lname,deu_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4452' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4452)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4452' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4452)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)    0.081 0.0125 Inf   6.498  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.00000000008122039

[1] 0.000000001137085


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804


In [None]:
%%R -i eng

lm3_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=eng, REML= FALSE)
emm3_1<-emmeans(lm3_1,pairwise~contingent)
pval<-summary(emm3_1$contrasts)$p.value
print(c(emm3_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm3_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm3_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

eng_lname <- eng$language[1]

eng_eff <- eff_size(emm3_1,sigma = sigma(lm3_1), edf = df.residual(lm3_1))

eng_eff <- summary(eng_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(eng_lname,eng_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 113356' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 113356)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 113356' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 113356)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)    0.107 0.00351 Inf  30.329  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004692424

[1] 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006569394


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672


In [None]:
%%R -i est

lm4_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=est, REML= FALSE)
emm4_1<-emmeans(lm4_1,pairwise~contingent)
pval<-summary(emm4_1$contrasts)$p.value
print(c(emm4_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm4_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm4_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

est_lname <- est$language[1]

est_eff <- eff_size(emm4_1,sigma = sigma(lm4_1), edf = df.residual(lm4_1))

est_eff <- summary(est_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(est_lname,est_eff)
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   0.0427 0.0148 2309   2.875  0.0041

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.004079875

[1] 0.05711826


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875


In [None]:
%%R -i fas

lm5_1 <- lmer(swu ~ contingent + (1|transcript_id),data=fas, REML= FALSE)
emm5_1<-emmeans(lm5_1,pairwise~contingent)
pval<-summary(emm5_1$contrasts)$p.value
print(c(emm5_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm5_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm5_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fas_lname <- fas$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(fas_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)    0.125 0.0463 641   2.702  0.0071

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.007072117

[1] 0.09900964
  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN


In [None]:
%%R -i fra
lm6_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=fra, REML= FALSE)
emm6_1<-emmeans(lm6_1,pairwise~contingent)
pval<-summary(emm6_1$contrasts)$p.value
print(c(emm6_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm6_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm6_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fra_lname <- fra$language[1]

fra_eff <- eff_size(emm6_1,sigma = sigma(lm6_1), edf = df.residual(lm6_1))

fra_eff <- summary(fra_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(fra_lname,fra_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 21241' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 21241)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 21241' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 21241)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)   0.0509 0.00654 Inf   7.779  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.000000000000007299281

[1] 0.0000000000001021899


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN
5           fra 0.129286127432911


In [None]:
%%R -i hrv

lm7_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=hrv, REML= FALSE)
emm7_1<-emmeans(lm7_1,pairwise~contingent)
pval<-summary(emm7_1$contrasts)$p.value
print(c(emm7_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm7_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm7_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

hrv_lname <- hrv$language[1]

hrv_eff <- eff_size(emm7_1,sigma = sigma(lm7_1), edf = df.residual(lm7_1))

hrv_eff <- summary(hrv_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(hrv_lname,hrv_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 6251' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 6251)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 6251' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 6251)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0693 0.0107 Inf   6.463  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.0000000001023428

[1] 0.000000001432799


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN
5           fra 0.129286127432911
6           hrv 0.169079925211206


In [None]:
%%R -i jpn

lm8_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=jpn, REML= FALSE)
emm8_1<-emmeans(lm8_1,pairwise~contingent)
pval<-summary(emm8_1$contrasts)$p.value
print(c(emm8_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm8_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm8_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

jpn_lname <- jpn$language[1]

jpn_eff <- eff_size(emm8_1,sigma = sigma(lm8_1), edf = df.residual(lm8_1))

jpn_eff <- summary(jpn_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(jpn_lname,jpn_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 25124' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 25124)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 25124' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 25124)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate      SE  df z.ratio p.value
 contingent - (non-contingent)    0.175 0.00634 Inf  27.631  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000004779791

[1] 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000006691708


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN
5           fra 0.129286127432911
6           hrv 0.169079925211206
7           jpn 0.370538605687043


In [None]:
%%R -i kor

lm9_1 <- lmer(swu ~ contingent + (1|transcript_id),data=kor, REML= FALSE)
emm9_1<-emmeans(lm9_1,pairwise~contingent)
pval<-summary(emm9_1$contrasts)$p.value
print(c(emm9_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm9_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm9_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

kor_lname <- kor$language[1]

kor_eff <- eff_size(emm9_1,sigma = sigma(lm9_1), edf = df.residual(lm9_1))

kor_eff <- summary(kor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(kor_lname,kor_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4615' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4615)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4615' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4615)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0848 0.0106 Inf   8.024  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.000000000000001023043

[1] 0.00000000000001432261


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN
5           fra 0.129286127432911
6           hrv 0.169079925211206
7           jpn 0.370538605687043
8           kor  0.25819504353917


In [None]:
%%R -i nor

lm10_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=nor, REML= FALSE)
emm10_1<-emmeans(lm10_1,pairwise~contingent)
pval<-summary(emm10_1$contrasts)$p.value
print(c(emm10_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm10_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm10_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

nor_lname <- nor$language[1]

nor_eff <- eff_size(emm10_1,sigma = sigma(lm10_1), edf = df.residual(lm10_1))

nor_eff <- summary(nor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(nor_lname,nor_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)    0.101 0.0211 1923   4.784  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000001850965

[1] 0.00002591352


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1           deu 0.203791769489804
2           eng 0.267969063164672
3           est 0.125671598106875
4           fas               NaN
5           fra 0.129286127432911
6           hrv 0.169079925211206
7           jpn 0.370538605687043
8           kor  0.25819504353917
9           nor 0.250109617249279


In [None]:
%%R -i pol
 
# simple linear model (no random effects, because only 1 transcript from 1 sub)

lm11_1 <- lm(swu ~ contingent,data=pol, REML= FALSE)
emm11_1<-emmeans(lm11_1,pairwise~contingent)
pval<-summary(emm11_1$contrasts)$p.value
print(c(emm11_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm11_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm11_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

pol_lname <- pol$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(pol_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate    SE df t.ratio p.value
 contingent - (non-contingent)    0.045 0.112 50   0.401  0.6898


[[2]]
[1] 0.689809

[1] 1
   Language_name  rand_effect_size
1            deu 0.203791769489804
2            eng 0.267969063164672
3            est 0.125671598106875
4            fas               NaN
5            fra 0.129286127432911
6            hrv 0.169079925211206
7            jpn 0.370538605687043
8            kor  0.25819504353917
9            nor 0.250109617249279
10           pol               NaN


In [None]:
%%R -i por

lm12_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=por, REML= FALSE)
emm12_1<-emmeans(lm12_1,pairwise~contingent)
pval<-summary(emm12_1$contrasts)$p.value
print(c(emm12_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm12_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm12_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

por_lname <- por$language[1]

por_eff <- eff_size(emm12_1,sigma = sigma(lm12_1), edf = df.residual(lm12_1))

por_eff <- summary(por_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(por_lname,por_eff)
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)   0.0698 0.0142 2661   4.905  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.00000099005

[1] 0.0000138607


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name  rand_effect_size
1            deu 0.203791769489804
2            eng 0.267969063164672
3            est 0.125671598106875
4            fas               NaN
5            fra 0.129286127432911
6            hrv 0.169079925211206
7            jpn 0.370538605687043
8            kor  0.25819504353917
9            nor 0.250109617249279
10           pol               NaN
11           por  0.19962474315635


In [None]:
%%R -i spa

lm13_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=spa, REML= FALSE)
emm13_1<-emmeans(lm13_1,pairwise~contingent)
pval<-summary(emm13_1$contrasts)$p.value
print(c(emm13_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm13_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm13_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

spa_lname <- spa$language[1]

spa_eff <- eff_size(emm13_1,sigma = sigma(lm13_1), edf = df.residual(lm13_1))

spa_eff <- summary(spa_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(spa_lname,spa_eff)
effect_sizes

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'pbkrtest.limit = 4375' (or larger)
[or, globally, 'set emm_options(pbkrtest.limit = 4375)' or larger];
but be warned that this may result in large computation time and memory use.

R[write to console]: Note: D.f. calculations have been disabled because the number of observations exceeds 3000.
To enable adjustments, add the argument 'lmerTest.limit = 4375' (or larger)
[or, globally, 'set emm_options(lmerTest.limit = 4375)' or larger];
but be warned that this may result in large computation time and memory use.



[[1]]
 contrast                      estimate     SE  df z.ratio p.value
 contingent - (non-contingent)   0.0907 0.0124 Inf   7.301  <.0001

Degrees-of-freedom method: asymptotic 

[[2]]
[1] 0.000000000000286318

[1] 0.000000000004008452


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name  rand_effect_size
1            deu 0.203791769489804
2            eng 0.267969063164672
3            est 0.125671598106875
4            fas               NaN
5            fra 0.129286127432911
6            hrv 0.169079925211206
7            jpn 0.370538605687043
8            kor  0.25819504353917
9            nor 0.250109617249279
10           pol               NaN
11           por  0.19962474315635
12           spa 0.223848639903587


In [None]:
%%R -i swe


lm14_1 <- lmer(swu ~ contingent + (1|target_child_id) + (1|transcript_id),data=swe, REML= FALSE)
emm14_1<-emmeans(lm14_1,pairwise~contingent)
pval<-summary(emm14_1$contrasts)$p.value
print(c(emm14_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm14_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm14_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

swe_lname <- swe$language[1]

swe_eff <- eff_size(emm14_1,sigma = sigma(lm14_1), edf = df.residual(lm14_1))

swe_eff <- summary(swe_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(swe_lname,swe_eff)
effect_sizes

[[1]]
 contrast                      estimate     SE   df t.ratio p.value
 contingent - (non-contingent)    0.136 0.0172 2715   7.892  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000000000000004279189

[1] 0.00000000000005990865


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name  rand_effect_size
1            deu 0.203791769489804
2            eng 0.267969063164672
3            est 0.125671598106875
4            fas               NaN
5            fra 0.129286127432911
6            hrv 0.169079925211206
7            jpn 0.370538605687043
8            kor  0.25819504353917
9            nor 0.250109617249279
10           pol               NaN
11           por  0.19962474315635
12           spa 0.223848639903587
13           swe 0.316404337565653


In [None]:
%%R -i zho

lm15_1 <- lmer(swu ~ contingent + (1|transcript_id),data=zho, REML= FALSE)
emm15_1<-emmeans(lm15_1,pairwise~contingent)
pval<-summary(emm15_1$contrasts)$p.value
print(c(emm15_1$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm15_1,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm15_1,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

zho_lname <- zho$language[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(zho_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate     SE  df t.ratio p.value
 contingent - (non-contingent)    0.087 0.0412 379   2.113  0.0353

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.03527606

[1] 0.4938648
   Language_name  rand_effect_size
1            deu 0.203791769489804
2            eng 0.267969063164672
3            est 0.125671598106875
4            fas               NaN
5            fra 0.129286127432911
6            hrv 0.169079925211206
7            jpn 0.370538605687043
8            kor  0.25819504353917
9            nor 0.250109617249279
10           pol               NaN
11           por  0.19962474315635
12           spa 0.223848639903587
13           swe 0.316404337565653
14           zho               NaN


In [None]:
%%R
write.csv(x=effect_sizes,'../data/SWU_effect_sizes.csv', row.names = FALSE)