# Natural Statistics Cross-linguistic: 

#### Lexical diversity analysis - random sample 

----

In [1]:
import pandas as pd
import numpy as np
import sys
sys.path.insert(0, "data_proc")
import analytic_proc
import warnings
warnings.filterwarnings('ignore')

In [2]:
rand_dat_inc = pd.read_csv("../data/rand_dat_inc_master.csv",index_col=0,low_memory=False)
rand_dat_inc=rand_dat_inc[rand_dat_inc["language"]!="ara"]
rand_dat_inc=rand_dat_inc[(rand_dat_inc["target_child_age"]>=5) & (rand_dat_inc["target_child_age"]<=30)]

rand_dat_inc_cg = rand_dat_inc[rand_dat_inc["caregiver"]=="caregiver"]

rand_dat_inc_cg["contingent"] = np.where(rand_dat_inc_cg["contingent"]==1, "contingent", "non-contingent")

rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"].notna()]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="xxx"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="yyy"]
rand_dat_inc_cg = rand_dat_inc_cg[rand_dat_inc_cg["gloss"]!="www"]

Would call the following if we could run lexical diversity analysis globally:

```python
analytic_proc.create_result(rand_dat_inc_cg)
```

However, we want to have seperate dictionaries for contingent and non-contingent words so we can compare them to one another.

The function will allow us to have a different dictionary for each transcript.

Finally, to compare, we can run mixed effects to understand whether contingent and non-contingent utterances differ in their lexical diversity, controlling for number of transcripts.

----
#### Seperate contingent and non-contingent utterances into individual dataframes

In [3]:
rand_dat_inc_cg_cc = rand_dat_inc_cg[rand_dat_inc_cg["contingent"]=="contingent"].reset_index(drop=True)
rand_dat_inc_cg_nc = rand_dat_inc_cg[rand_dat_inc_cg["contingent"]=="non-contingent"].reset_index(drop=True)

----
#### Loop through each unique transcript to compute the lexical diversity counts across languages.

In [4]:
analytic_proc.create_c_result(rand_dat_inc_cg_cc,"rand")

In [5]:
analytic_proc.create_nc_result(rand_dat_inc_cg_nc,"rand")

----
#### Lexical Diversity plot

In [6]:
rand_dat_inc_cg_cc = pd.read_csv("../data/rand_dat_inc_master_cc_lexdiv.csv",index_col=0,low_memory=False)
rand_dat_inc_cg_nc = pd.read_csv("../data/rand_dat_inc_master_nc_lexdiv.csv",index_col=0,low_memory=False)

# combine dataframes into one

rand_dat_inc_cg = pd.concat([rand_dat_inc_cg_cc,rand_dat_inc_cg_nc])

In [7]:
rand_dat_inc_cg.shape

(158714, 37)

In [8]:
# add play context and year of study

play_context = pd.read_csv("../data/context_data.csv")
play_context = play_context.rename(columns={"Corpus": "corpus_name"})

# print(play_context.to_markdown())

rand_dat_inc_cg = rand_dat_inc_cg.merge(play_context,on='corpus_name')

rand_dat_inc_cg["context"] = rand_dat_inc_cg["Location"] + rand_dat_inc_cg["Activity"]

rand_dat_inc_cg["context"] = rand_dat_inc_cg["context"].replace({"HomeBook-reading":"Home: book reading",
                                                                 "HomeInterview/Unstructured":"Home: interview/unstructured",
                                                                 "HomeNaN":"Home: unreported",
                                                                 "HomeOther":"Home: other",
                                                                 "HomeUnstructured":"Home: unstructured",
                                                                 "LabOther":"Lab: other",
                                                                 "LabTabletop play":"Lab: tabletop play",
                                                                 "LabInterview/Unstructured":"Lab: interview/unstructured",
                                                                 "LabUnstructured":"Lab: unstructured",
                                                                 np.nan:"Unreported",
                                                                 "OtherUnstructured":"Other: unstructured"})

# year of study
corpora_year = pd.read_csv("../data/corpora_year.csv")
corpora_year = corpora_year.rename(columns={"Corpora": "corpus_name"})
corpora_year = corpora_year[["corpus_name", "Year collected"]]

rand_dat_inc_cg = rand_dat_inc_cg.merge(corpora_year,on='corpus_name')

In [29]:
print(rand_dat_inc_cg[rand_dat_inc_cg["context"] == "Lab: unstructured"].to_markdown())

|       |     id | gloss                                                                                                            | stem                                                                                                      |   actual_phonology |   model_phonology | type                | language   |   num_morphemes |   num_tokens |   utterance_order | corpus_name   | part_of_speech                                                                                                             | speaker_code   |   speaker_name | speaker_role   | target_child_name   |   target_child_age | target_child_sex   |   media_start |   media_end | media_unit   | collection_name   |   collection_id |   corpus_id |   speaker_id |   target_child_id |   transcript_id | selection   |   selection_duration | filename                 | caregiver   | Language_name   | Language_Family   | Language_Genus   |   Agglutinative | contingent     |   uniquenss | Language   | CHILDES Category   | Locat

In [9]:
ar = [['min', rand_dat_inc_cg["target_child_age"].min()],
      ['max', rand_dat_inc_cg["target_child_age"].max()],
      ['mean', rand_dat_inc_cg["target_child_age"].mean()],
      ['stdev', rand_dat_inc_cg["target_child_age"].std()]]

age_range = pd.DataFrame(ar, columns = ['stat', 'age'])

print(age_range.to_markdown())

|    | stat   |      age |
|---:|:-------|---------:|
|  0 | min    |  5.98575 |
|  1 | max    | 29.9862  |
|  2 | mean   | 19.2703  |
|  3 | stdev  |  6.65243 |


In [10]:
rand_lex_stats = (rand_dat_inc_cg.groupby(["Language_name","target_child_id","transcript_id","contingent"]) #,"context","Year collected"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())
rand_lex_sumstats =  rand_lex_stats.rename({'sum': 'sums'}, axis=1)

In [11]:
len(rand_lex_sumstats["Language_name"].unique())

14

In [12]:
len(rand_lex_sumstats["transcript_id"].unique())

1562

In [13]:
rand_lex_sumstats.to_csv("../data/rand_lex_sumstats.csv")

In [14]:
%load_ext rpy2.ipython

Simple plot

In [15]:
%%R -i rand_lex_sumstats

library("lme4")
library("repr")
library("knitr")
library("broom")
library("emmeans")
library("tidyverse")

options(repr.plot.width=6, repr.plot.height=12, scipen=999)

xlabs <- c("C", "NC")

# ara_label <- data.frame(means=c(0),contingent = c(1.5),language="ara") # no adult speech transcribed
deu_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="English")
est_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Korean")
# nor_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Norwegian")
nor_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Norwegian")
pol_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Portuguese")
spa_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(sums=c(240),contingent = c(1.5),Language_name="Mandarin")
# zho_ns_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_lex_sumstats, aes(x = contingent, y = sums, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .5) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=8,color="black") + 
     geom_text(data = eng_label,label = "***",size=8,color="black") +  
     geom_text(data = est_label,label = "**",size=8,color="black") +  
#      geom_text(data = fas_ns_label,label = "*",size=4,color="black",fontface = "italic") +
     geom_text(data = fas_label,label = "*",size=8, color="black") +
     geom_text(data = fra_label,label = "***",size=8,color="black") +  
     geom_text(data = hrv_label,label = "***",size=8,color="black") + 
     geom_text(data = jpn_label,label = "***",size=8,color="black") + 
     geom_text(data = kor_label,label = "***",size=8,color="black") +  
     geom_text(data = nor_ns_label,label = "ns",size=4,color="black",fontface = "italic") +  
#      geom_text(data = pol_ns_label,label = "ns", size=4,color="black",fontface = "italic") +  
     geom_text(data = por_label,label = "**",size=8,color="black") +  
     geom_text(data = spa_ns_label,label = "ns",size=4,color="black",fontface = "italic") + 
     geom_text(data = swe_label,label = "ns",size=4,color="black",fontface = "italic") + 
#      geom_text(data = zho_ns_label,label = "ns",size=4,color="black",fontface = "italic") +
     geom_text(data = zho_label,label = "^",size=8, color="black") +
     ylim(0, 250) +
     labs(tag = "A",
          y = "Number of Unique Words", x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=16),
           axis.text.x = element_text(vjust = 0.5, hjust=.5),
           legend.title = element_blank(),
           legend.background = element_rect(fill=alpha("white",0.90),
                                                            size=0, linetype="dotted",
                                                            colour = "white"),
           legend.text=element_text(size=16))
     ggsave("../figures/lexical_diversity_rand.pdf", width = 11.7, height = 6.2)

R[write to console]: Loading required package: Matrix

R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

R[write to console]: ✔ ggplot2 3.4.2     ✔ purrr   1.0.1
✔ tibble  3.2.1     ✔ dplyr   1.1.2
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.4     ✔ forcats 1.0.0

R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()



for manuscript

In [16]:
%%R -i rand_lex_sumstats

library('ggplot2')
library('repr')
options(repr.plot.width=6, repr.plot.height=12)

xlabs <- c("C", "NC")

# ara_label <- data.frame(means=c(0),contingent = c(1.5),language="ara") # no adult speech transcribed
deu_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="German")
eng_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="English")
est_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Estonian")
fas_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Persian")
fra_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="French")
hrv_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Croatian")
jpn_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Japanese")
kor_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Korean")
# nor_label <- data.frame(means=c(5.8),contingent = c(1.5),Language_name="Norwegian")
nor_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Norwegian")
pol_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Polish")
por_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Portuguese")
spa_ns_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Spanish")
swe_label <- data.frame(sums=c(247),contingent = c(1.5),Language_name="Swedish")
zho_label <- data.frame(sums=c(235),contingent = c(1.5),Language_name="Mandarin")
# zho_ns_label <- data.frame(means=c(6),contingent = c(1.5),Language_name="Mandarin")


p <- ggplot(rand_lex_sumstats, aes(x = contingent, y = sums, color = Language_name)) +
     stat_summary(fun.y=mean, geom="point", shape=19, size=1.75) + 
     stat_summary(fun.data = mean_se, geom = "errorbar", size=1.25, width = .4) +
     facet_wrap(. ~ Language_name,ncol = 7) + 
     geom_text(data = deu_label,label = "***",size=6,color="black") + 
     geom_text(data = eng_label,label = "***",size=6,color="black") +  
     geom_text(data = est_label,label = "**",size=6,color="black") +  
     geom_text(data = fas_label,label = "*",size=6, color="black") +
     geom_text(data = fra_label,label = "***",size=6,color="black") +  
     geom_text(data = hrv_label,label = "***",size=6,color="black") + 
     geom_text(data = jpn_label,label = "***",size=6,color="black") + 
     geom_text(data = kor_label,label = "***",size=6,color="black") +  
     geom_text(data = nor_ns_label,label = "ns",size=3,color="black",fontface = "italic") +  
     geom_text(data = por_label,label = "**",size=6,color="black") +  
     geom_text(data = spa_ns_label,label = "ns",size=3,color="black",fontface = "italic") + 
     geom_text(data = swe_label,label = "ns",size=3,color="black",fontface = "italic") + 
     geom_text(data = zho_label,label = "^",size=3, color="black") +
     ylim(0, 250) +
     labs(tag = "A",
          y = "Number of Unique Words", x = "") +
     theme_classic() +
     scale_x_discrete(labels= xlabs) +
     theme(text = element_text(size=11.5),
           axis.text.x = element_text(vjust = 0.5, hjust=0.5),
           legend.position="none")
     ggsave("../figures/figure_2_A.pdf", width = 11.5, height = 4.2)

Plot + effect estimates

In [17]:
%%R

deu_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="German")
eng_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="English")
est_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Estonian")
fas_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Persian")
fra_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="French")
hrv_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Croatian")
jpn_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Japanese")
kor_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Korean")
nor_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Norwegian")
por_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Portuguese")
spa_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Spanish")
swe_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Swedish")
zho_est_label <- data.frame(sums=c(.25),contingent = c(1),Language_name="Mandarin")

p <- p + geom_text(data = deu_est_label,label = "est=-66",size=4,color="black") +
         geom_text(data = eng_est_label,label = "est=-122",size=4,color="black") +
         geom_text(data = est_est_label,label = "est=-54.2",size=4,color="black") +
         geom_text(data = fas_est_label,label = "est=-51.1",size=4,color="black") +
         geom_text(data = fra_est_label,label = "est=-67.3",size=4,color="black") +
         geom_text(data = hrv_est_label,label = "est=-37.5",size=4,color="black") +
         geom_text(data = jpn_est_label,label = "est=-41.9",size=4,color="black") +
         geom_text(data = kor_est_label,label = "est=-101",size=4,color="black") +
#          geom_text(data = nor_est_label,label = "est=-.68",size=4,color="black") +
         geom_text(data = por_est_label,label = "est=-32.5",size=4,color="black") +
#          geom_text(data = spa_est_label,label = "est=-.39",size=4,color="black") +
         geom_text(data = swe_est_label,label = "est=-43.4",size=4,color="black") +
         geom_text(data = zho_est_label,label = "est=-60",size=4,color="black")
         

ggsave("../figures/lexical_diversity_rand_eff.pdf", width = 11.7, height = 6.2)

\+ sample size

In [18]:
%%R

deu_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="German")
eng_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="English")
est_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Estonian")
fas_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Persian")
fra_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="French")
hrv_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Croatian")
jpn_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Japanese")
kor_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Korean")
nor_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Norwegian")
pol_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Polish")
por_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Portuguese")
spa_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Spanish")
swe_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Swedish")
zho_n_label <- data.frame(sums=c(.25),contingent = c(1.7),Language_name="Mandarin")

deu_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="German")
eng_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="English")
est_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Estonian")
fas_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Persian")
fra_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="French")
hrv_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Croatian")
jpn_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Japanese")
kor_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Korean")
nor_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Norwegian")
pol_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Polish")
por_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Portuguese")
spa_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Spanish")
swe_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Swedish")
zho_sz_label <- data.frame(sums=c(.25),contingent = c(2.1),Language_name="Mandarin")

p <- p + geom_text(data = deu_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = eng_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = est_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fas_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = fra_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = hrv_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = jpn_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = kor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = nor_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = pol_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = por_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = spa_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = swe_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = zho_n_label,label = "n",size=4,color="black",fontface = "italic") +
         geom_text(data = deu_sz_label,label = " = 39",size=4,color="black") +
         geom_text(data = eng_sz_label,label = " = 1005",size=4,color="black") +
         geom_text(data = est_sz_label,label = " = 22",size=4,color="black") +
         geom_text(data = fas_sz_label,label = " = 12",size=4,color="black") +
         geom_text(data = fra_sz_label,label = " = 303",size=4,color="black") +
         geom_text(data = hrv_sz_label,label = " = 79",size=4,color="black") +
         geom_text(data = jpn_sz_label,label = " = 139",size=4,color="black") +
         geom_text(data = kor_sz_label,label = " = 37",size=4,color="black") +
         geom_text(data = nor_sz_label,label = " = 56",size=4,color="black") +
         geom_text(data = pol_sz_label,label = " = 1",size=4,color="black") +
         geom_text(data = por_sz_label,label = " = 24",size=4,color="black") +
         geom_text(data = spa_sz_label,label = " = 31",size=4,color="black") +
         geom_text(data = swe_sz_label,label = " = 16",size=4,color="black") +
         geom_text(data = zho_sz_label,label = " = 2",size=4,color="black")
         

ggsave("../figures/lexical_diversity_rand_eff_n.pdf", width = 11.7, height = 6.2)

----
#### Statistical analyses

By language

In [19]:
rand_lex_sumstats = rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']] #, 'Year collected']]

rand_dat_inc_cg_count = rand_dat_inc_cg[['Language_name','transcript_id','target_child_id']]

In [20]:
# model incorporating year of collection (visualize changes with year first)

# lmer(sums ~ contingent + year + (1|target_child_id) + (1|transcript_id), data = ., REML= FALSE))

In [21]:
%%R -i rand_lex_sumstats -i rand_dat_inc_cg_count

# vectors for rows to remove from lmer
case_study <- c("Korean", "Mandarin", "Persian") # only 1 target child analyzed

single_tran <- c("Polish") # only 1 transcript

# nests of models
lexdiv_nest1 <- rand_lex_sumstats %>%
    #mutate_at(c("Year collected"), as.factor) %>%
    filter(!Language_name %in% case_study) %>%
    filter(!Language_name %in% single_tran) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|target_child_id) +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))


lexdiv_nest2 <- rand_lex_sumstats %>%
    filter(Language_name %in% case_study) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_name, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))

lexdiv_nest3 <- rand_lex_sumstats %>%
    filter(Language_name %in% single_tran) %>%
    group_by(Language_name) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(sums ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise")))) %>%
    select(Language_name, contrasts) %>%
    unnest(cols = c(contrasts))  %>%
    rename(statistic = `t.ratio`)
    
# number of transcripts per language
sample_size <- rand_dat_inc_cg_count %>%
    group_by(Language_name) %>%
    summarize(n = n_distinct(transcript_id))
    
# combine lmer summaries and correct p-values for multiple comparisons
emms_all <- list(lexdiv_nest1, lexdiv_nest2, lexdiv_nest3) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 14)) %>%
    left_join(sample_size)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



Joining with `by = join_by(Language_name)`


format statistics table

In [22]:
%%R

table_maker = function(data) { data %>%
    select(Language_name, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,Language)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language (n)", c('Language','n'), sep=" (") %>%
    mutate(`Language (n)` = paste0(`Language (n)`,")")) %>%
    arrange(`Language (n)`)
    }

lexdiv_stats_table <- table_maker(emms_all)

kable(lexdiv_stats_table,"pipe")



|Language (n)    |Estimate (SE)   | Test statistic| Effect size|Adjusted p-value |
|:---------------|:---------------|--------------:|-----------:|:----------------|
|Croatian (58)   |-48.34 (3.66)   |         -13.22|       -2.48|<.0001           |
|English (872)   |-103.76 (1.95)  |         -53.29|       -2.58|<.0001           |
|Estonian (22)   |-75.05 (12.55)  |          -5.98|       -1.85|0.0001           |
|French (275)    |-70.88 (3.17)   |         -22.39|       -1.92|<.0001           |
|German (38)     |-66.82 (7.89)   |          -8.47|       -1.97|<.0001           |
|Japanese (160)  |-53.96 (3.17)   |         -17.01|       -1.90|<.0001           |
|Korean (28)     |-116.71 (10.89) |         -10.72|       -2.92|<.0001           |
|Mandarin (2)    |-91 (2)         |         -45.50|      -64.35|<.0001           |
|Norwegian (26)  |-42.24 (12.61)  |          -3.35|       -1.02|0.0356           |
|Persian (11)    |-63.09 (10.62)  |          -5.94|       -2.66|0.0009           |
|P

In [23]:
%%R 

# add columns sample and measure and save

lexdiv_stats_table %>%
    mutate(sample = "rand",
           measure = "lexdiv") %>%
    write.csv(file = "../data/rand_lexdiv_stats.csv") 

By play context

In [24]:
rand_lex_sumstats_contex = (rand_dat_inc_cg.groupby(["context","target_child_id","transcript_id","contingent"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())

rand_lex_sumstats_contex =  rand_lex_sumstats_contex.rename({'sum': 'sums'}, axis=1)

In [25]:
rand_lex_sumstats_contex["context"].unique()

array(['Home: book reading', 'Home: interview/unstructured',
       'Home: other', 'Home: unstructured', 'Lab: interview/unstructured',
       'Lab: unstructured', 'Other: unstructured', 'Unreported'],
      dtype=object)

In [26]:
%%R -i rand_lex_sumstats_contex

# vectors for rows to remove from lmer
single_tran <- c("Home: interview/unstructured") # only 1 transcript

contex_sample_size <- rand_lex_sumstats_contex %>%
    group_by(context) %>%
    summarize(n = n_distinct(transcript_id))

lexdiv_contex_nest_1 <- rand_lex_sumstats_contex %>%
    filter(!context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(context, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))
    
lexdiv_contex_nest_2 <- rand_lex_sumstats_contex %>%
    filter(context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(sums ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise")))) %>%
    select(context, contrasts) %>%
    unnest(cols = c(contrasts))  %>%
    rename(statistic = `t.ratio`)

# combine lmer summaries and correct p-values for multiple comparisons
context_emms_all <- list(lexdiv_contex_nest_1, lexdiv_contex_nest_2) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 7)) %>%
    left_join(contex_sample_size)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



Joining with `by = join_by(context)`


In [27]:
%%R

table_maker = function(data) { data %>%
    select(context, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Play context", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Play context`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Play context (n)", c(`Play context`,'n'), sep=" (") %>%
    mutate(`Play context (n)` = paste0(`Play context (n)`,")")) %>%
    arrange(`Play context (n)`)
    }
    
lexdiv_context_stats_table <- table_maker(context_emms_all)

kable(lexdiv_context_stats_table)



|Play context (n)                  |Estimate (SE)   | Test statistic| Effect size|Adjusted p-value |
|:---------------------------------|:---------------|--------------:|-----------:|:----------------|
|Home: book reading (28)           |-116.71 (10.89) |         -10.72|       -2.92|<.0001           |
|Home: interview/unstructured (1)  |-13 (NaN)       |            NaN|          NA|NaN              |
|Home: other (20)                  |-50.3 (4.96)    |         -10.14|       -3.29|<.0001           |
|Home: unstructured (898)          |-74.44 (1.81)   |         -41.22|       -1.95|<.0001           |
|Lab: interview/unstructured (360) |-137.9 (2.81)   |         -49.06|       -3.74|<.0001           |
|Lab: unstructured (26)            |-69.27 (8.96)   |          -7.73|       -2.19|<.0001           |
|Other: unstructured (128)         |-59.95 (2.83)   |         -21.17|       -2.66|<.0001           |
|Unreported (101)                  |-30.78 (2.89)   |         -10.67|       -1.51|<.0001 

By context, dropping English

In [31]:
# drop English
rand_dat_inc_cg_no_eng = rand_dat_inc_cg[rand_dat_inc_cg["Language_name"] != "English"]

rand_lex_sumstats_contex_no_eng = (rand_dat_inc_cg_no_eng.groupby(["context","target_child_id","transcript_id","contingent"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())

rand_lex_sumstats_contex_no_eng =  rand_lex_sumstats_contex_no_eng.rename({'sum': 'sums'}, axis=1)

In [44]:
%%R -i rand_lex_sumstats_contex_no_eng

# # vectors for rows to remove from lmer
single_tran <- c("Home: interview/unstructured") # only 1 transcript

contex_sample_size_no_eng <- rand_lex_sumstats_contex_no_eng %>%
    group_by(context) %>%
    summarize(n = n_distinct(transcript_id))

lexdiv_contex_nest_1_no_eng <- rand_lex_sumstats_contex_no_eng %>%
    filter(!context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(context, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`))

lexdiv_contex_nest_2_no_eng <- rand_lex_sumstats_contex_no_eng %>%
    filter(context %in% single_tran) %>%
    group_by(context) %>%
    nest() %>%
    mutate(fit = map(data, ~ lm(sums ~ contingent,
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise")))) %>%
    select(context, contrasts) %>%
    unnest(cols = c(contrasts))  %>%
    rename(statistic = `t.ratio`)
    
# combine lmer summaries and correct p-values for multiple comparisons
context_no_eng_emms_all <- list(lexdiv_contex_nest_1_no_eng, lexdiv_contex_nest_2_no_eng) %>% 
    reduce(bind_rows) %>%
    mutate(p.value = p.adjust(p.value, "holm", 3)) %>%
    left_join(contex_sample_size_no_eng)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



Joining with `by = join_by(context)`


In [45]:
%%R 

table_maker = function(data) { data %>%
    select(context, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Play context", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Play context`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Play context (n)", c(`Play context`,'n'), sep=" (") %>%
    mutate(`Play context (n)` = paste0(`Play context (n)`,")")) %>%
    arrange(`Play context (n)`)
    }
    
lexdiv_context_stats_table_no_eng <- table_maker(context_no_eng_emms_all)

kable(lexdiv_context_stats_table_no_eng)



|Play context (n)                 |Estimate (SE)   | Test statistic| Effect size|Adjusted p-value |
|:--------------------------------|:---------------|--------------:|-----------:|:----------------|
|Home: book reading (28)          |-116.71 (10.89) |         -10.72|       -2.92|<.0001           |
|Home: interview/unstructured (1) |-13 (NaN)       |            NaN|          NA|NaN              |
|Home: unstructured (560)         |-64.37 (2.12)   |         -30.41|       -1.83|<.0001           |


By language family

In [20]:
rand_lex_sumstats_fam = (rand_dat_inc_cg.groupby(["Language_Family","target_child_id","transcript_id","contingent"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())

rand_lex_sumstats_fam =  rand_lex_sumstats_fam.rename({'sum': 'sums'}, axis=1)

In [27]:
%%R -i rand_lex_sumstats_fam

fam_sample_size <- rand_lex_sumstats_fam %>%
    group_by(Language_Family) %>%
    summarize(n = n_distinct(transcript_id))
    
lexdiv_fam_nest <- rand_lex_sumstats_fam %>%
    group_by(Language_Family) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_Family, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(fam_sample_size)
    
table_maker = function(data) { data %>%
    select(Language_Family, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language Family", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Language Family`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language Family (n)", c(`Language Family`,'n'), sep=" (") %>%
    mutate(`Language Family (n)` = paste0(`Language Family (n)`,")")) %>%
    arrange(`Language Family (n)`)
    }
    
lexdiv_fam_stats_table <- table_maker(lexdiv_fam_nest)

kable(lexdiv_fam_stats_table)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



Joining, by = "Language_Family"


|Language Family (n)  |Estimate (SE)  | Test statistic| Effect size|Adjusted p-value |
|:--------------------|:--------------|--------------:|-----------:|:----------------|
|Indo-European (1483) |-91.31 (1.77)  |         -51.64|       -1.93|<.0001           |
|Japonic (160)        |-51.53 (3.53)  |         -14.58|       -1.64|<.0001           |
|Koreanic (28)        |-107 (12.81)   |          -8.36|       -2.27|<.0001           |
|Sino-Tibetan (2)     |-91.5 (0.5)    |        -183.00|     -258.80|<.0001           |
|Uralic (22)          |-69.36 (15.29) |          -4.54|       -1.40|0.0007           |


By language genus

In [22]:
rand_lex_sumstats_gen = (rand_dat_inc_cg.groupby(["Language_Genus","target_child_id","transcript_id","contingent"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())

rand_lex_sumstats_gen =  rand_lex_sumstats_gen.rename({'sum': 'sums'}, axis=1)

In [28]:
%%R -i rand_lex_sumstats_gen
    
gen_sample_size <- rand_lex_sumstats_gen %>%
    group_by(Language_Genus) %>%
    summarize(n = n_distinct(transcript_id))
    
lexdiv_gen_nest <- rand_lex_sumstats_gen %>%
    group_by(Language_Genus) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Language_Genus, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(gen_sample_size)
    
table_maker = function(data) { data %>%
    select(Language_Genus, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Language Genus", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Language Genus`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Language Genus (n)", c(`Language Genus`,'n'), sep=" (") %>%
    mutate(`Language Genus (n)` = paste0(`Language Genus (n)`,")")) %>%
    arrange(`Language Genus (n)`)
    }
    
lexdiv_gen_stats_table <- table_maker(lexdiv_gen_nest)

kable(lexdiv_gen_stats_table)

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



Joining, by = "Language_Genus"


|Language Genus (n) |Estimate (SE)  | Test statistic| Effect size|Adjusted p-value |
|:------------------|:--------------|--------------:|-----------:|:----------------|
|Chinese (2)        |-91.5 (0.5)    |        -183.00|     -258.80|<.0001           |
|Finnic (22)        |-69.36 (15.29) |          -4.54|       -1.40|0.0007           |
|Germanic (1077)    |-104.9 (2.08)  |         -50.36|       -2.22|<.0001           |
|Iranian (11)       |-63.36 (12.9)  |          -4.91|       -2.20|0.0018           |
|Japanese (160)     |-51.53 (3.53)  |         -14.58|       -1.64|<.0001           |
|Korean (28)        |-107 (12.81)   |          -8.36|       -2.27|<.0001           |
|Romance (335)      |-59.85 (3.09)  |         -19.39|       -1.51|<.0001           |
|Savlic (60)        |-36.16 (3.92)  |          -9.22|       -1.71|<.0001           |


By agglutinate status

In [24]:
rand_lex_sumstats_agg = (rand_dat_inc_cg.groupby(["Agglutinative","target_child_id","transcript_id","contingent"])
                                  .uniquenss
                                  .agg(["sum"])
                                  .reset_index())

rand_lex_sumstats_agg =  rand_lex_sumstats_agg.rename({'sum': 'sums'}, axis=1)

In [25]:
%%R -i rand_lex_sumstats_agg

rand_lex_sumstats_agg %>% 
    count(Agglutinative)
    
agg_sample_size <- rand_lex_sumstats_agg %>%
    group_by(Agglutinative) %>%
    summarize(n = n_distinct(transcript_id))
    
lexdiv_agg_nest <- rand_lex_sumstats_agg %>%
    group_by(Agglutinative) %>%
    nest() %>%
    mutate(fit = map(data, ~ lmer(sums ~ contingent +
                                (1|transcript_id),
                                data = .,
                                REML= FALSE)),
           summary = map(fit, ~ emmeans(., "contingent")),
           contrasts = map(summary, ~ summary(contrast(., method = "pairwise"))),
           effect_size = map2(summary, fit, ~ eff_size(.x, sigma = sigma(.y), edf = df.residual(.y)))) %>%
    select(Agglutinative, contrasts, effect_size) %>%
    unnest(cols = c(contrasts)) %>%
    mutate(effect_size = map(effect_size, ~ summary(.))) %>%
    unnest() %>%
    mutate(statistic = coalesce(`t.ratio`), .before = p.value) %>%
    select (-c(`t.ratio`)) %>%
    mutate(p.value = p.adjust(p.value, "holm", 5)) %>%
    left_join(agg_sample_size)
    
table_maker = function(data) { data %>%
    select(Agglutinative, n, estimate, SE, statistic, effect.size, p.value) %>%
    `colnames<-`(c("Agglutinative Status", "n", "Estimate", "SE", "Test statistic", "Effect size", "Adjusted p-value")) %>%
    mutate_at(vars(-c(`Adjusted p-value`,`Agglutinative Status`)), round,2) %>%
    mutate(`Adjusted p-value` = format(round(`Adjusted p-value`,4),nsmall=4)) %>%
    mutate(`Adjusted p-value` = gsub("0.0000","<.0001",`Adjusted p-value`)) %>%
    unite("Estimate (SE)", c('Estimate','SE'), sep=" (") %>%
    mutate(`Estimate (SE)` = paste0(`Estimate (SE)`,")")) %>%
    unite("Agglutinative Status (n)", c(`Agglutinative Status`,'n'), sep=" (") %>%
    mutate(`Agglutinative Status (n)` = paste0(`Agglutinative Status (n)`,")")) %>%
    arrange(`Agglutinative Status (n)`)
    }
    
lexdiv_agg_stats_table <- table_maker(lexdiv_agg_nest)

kable(lexdiv_agg_stats_table)

Joining, by = "Agglutinative"


|Agglutinative Status (n) |Estimate (SE) | Test statistic| Effect size|Adjusted p-value |
|:------------------------|:-------------|--------------:|-----------:|:----------------|
|0 (1485)                 |-91.31 (1.77) |         -51.71|       -1.93|<.0001           |
|1 (210)                  |-60.79 (3.96) |         -15.35|       -1.50|<.0001           |


----
#### Lexical diversity mixed models

In [17]:
# ara=rand_dat_inc_cg[['language','num_tokens','contingent','transcript_id','target_child_id']][rand_dat_inc_cg["language"]=="ara"] # no adult speech transcribed
deu=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="German"]
eng=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="English"]
est=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Estonian"]
fas=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Persian"]
fra=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="French"]
hrv=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Croatian"]
jpn=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Japanese"]
kor=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Korean"]
nor=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Norwegian"]
pol=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Polish"]
por=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Portuguese"]
spa=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Spanish"]
swe=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Swedish"]
zho=rand_lex_sumstats[['Language_name','sums','contingent','transcript_id','target_child_id']][rand_lex_sumstats["Language_name"]=="Mandarin"]

In [18]:
%%R

library("lme4")
library("emmeans")
library("lmerTest")
library("tidyverse")

options(scipen = 999)

effect_sizes <- data.frame(matrix(ncol = 2, nrow = 0))
cols <- c("Language_name", "rand_effect_size")
colnames(effect_sizes) <- cols

R[write to console]: Loading required package: Matrix

R[write to console]: 
Attaching package: ‘lmerTest’


R[write to console]: The following object is masked from ‘package:lme4’:

    lmer


R[write to console]: The following object is masked from ‘package:stats’:

    step


R[write to console]: ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──

R[write to console]: ✔ tibble  3.1.8      ✔ dplyr   1.0.10
✔ tidyr   1.2.0      ✔ stringr 1.4.1 
✔ readr   2.1.2      ✔ forcats 0.5.2 
✔ purrr   0.3.5      

R[write to console]: ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ tidyr::expand() masks Matrix::expand()
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
✖ tidyr::pack()   masks Matrix::pack()
✖ tidyr::unpack() masks Matrix::unpack()



In [19]:
%%R -i deu

lm2 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=deu, REML= FALSE)
emm2<-emmeans(lm2,pairwise~contingent)
pval<-summary(emm2$contrasts)$p.value
print(c(emm2$contrasts, pval))
print(p.adjust(pval, "holm", 14)) # create big vector of p-values and ajdust those
# # summary(emmeans(lm2,"contingent",contr="pairwise"),infer=TRUE) #group means
# # test(contrast(emmeans(lm2,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

deu_lname <- deu$Language_name[1]

deu_eff <- eff_size(emm2,sigma = sigma(lm2), edf = df.residual(lm2))

deu_eff <- summary(deu_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(deu_lname,deu_eff)
effect_sizes

[[1]]
 contrast                      estimate   SE df t.ratio p.value
 contingent - (non-contingent)    -66.6 7.93 39  -8.392  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.0000000002850791

[1] 0.000000003991107


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439


In [20]:
%%R -i eng

lm3 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=eng, REML= FALSE)
emm3<-emmeans(lm3,pairwise~contingent)
pval<-summary(emm3$contrasts)$p.value
print(c(emm3$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm3,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm3,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

eng_lname <- eng$Language_name[1]

eng_eff <- eff_size(emm3,sigma = sigma(lm3), edf = df.residual(lm3))

eng_eff <- summary(eng_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(eng_lname,eng_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE  df t.ratio p.value
 contingent - (non-contingent)     -110 2.03 968 -54.028  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000001969741

[1] 0.00000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000002757638


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895


In [21]:
%%R -i est

lm4 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=est, REML= FALSE)
emm4<-emmeans(lm4,pairwise~contingent)
pval<-summary(emm4$contrasts)$p.value
print(c(emm4$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm4,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm4,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

est_lname <- est$Language_name[1]

est_eff <- eff_size(emm4,sigma = sigma(lm4), edf = df.residual(lm4))

est_eff <- summary(est_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(est_lname,est_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -69.4 15.3 23.1  -4.535  0.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000147549

[1] 0.002065686


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243


In [22]:
%%R -i fas

lm5 <- lmer(sums ~ contingent + (1|transcript_id),data=fas, REML= FALSE)
emm5<-emmeans(lm5,pairwise~contingent)
pval<-summary(emm5$contrasts)$p.value
print(c(emm5$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm5,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm5,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fas_lname <- fas$Language_name[1]

fas_eff <- eff_size(emm5,sigma = sigma(lm5), edf = df.residual(lm5))

fas_eff <- summary(fas_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(fas_lname,fas_eff)
effect_sizes

[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -63.4 12.9 12.1  -4.911  0.0004

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.0003505049

[1] 0.004907068


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833


In [23]:
%%R -i fra

lm6 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=fra, REML= FALSE)
emm6<-emmeans(lm6,pairwise~contingent)
pval<-summary(emm6$contrasts)$p.value
print(c(emm6$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm6,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm6,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

fra_lname <- fra$Language_name[1]

fra_eff <- eff_size(emm6,sigma = sigma(lm6), edf = df.residual(lm6))

fra_eff <- summary(fra_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(fra_lname,fra_eff)
effect_sizes

[[1]]
 contrast                      estimate  SE  df t.ratio p.value
 contingent - (non-contingent)    -67.8 3.3 281 -20.581  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.0000000000000000000000000000000000000000000000000000000004892677

[1] 0.000000000000000000000000000000000000000000000000000000006849748


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833
5        French -1.74618705563958


In [24]:
%%R -i hrv

lm7 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=hrv, REML= FALSE)
emm7<-emmeans(lm7,pairwise~contingent)
pval<-summary(emm7$contrasts)$p.value
print(c(emm7$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm7,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm7,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

hrv_lname <- hrv$Language_name[1]

hrv_eff <- eff_size(emm7,sigma = sigma(lm7), edf = df.residual(lm7))

hrv_eff <- summary(hrv_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(hrv_lname,hrv_eff)
effect_sizes

[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -36.7 3.96 59.2  -9.267  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000000000000396518

[1] 0.000000000005551251


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833
5        French -1.74618705563958
6      Croatian -1.73388694741294


In [25]:
%%R -i jpn

lm8 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=jpn, REML= FALSE)
emm8<-emmeans(lm8,pairwise~contingent)
pval<-summary(emm8$contrasts)$p.value
print(c(emm8$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm8,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm8,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

jpn_lname <- jpn$Language_name[1]

jpn_eff <- eff_size(emm8,sigma = sigma(lm8), edf = df.residual(lm8))

jpn_eff <- summary(jpn_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(jpn_lname,jpn_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE  df t.ratio p.value
 contingent - (non-contingent)    -51.5 3.38 160 -15.262  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000000000000000000000000000000005236558

[1] 0.00000000000000000000000000000007331181


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833
5        French -1.74618705563958
6      Croatian -1.73388694741294
7      Japanese -1.70808151489496


In [26]:
%%R -i kor

lm9 <- lmer(sums ~ contingent + (1|transcript_id), data=kor, REML= FALSE)
emm9<-emmeans(lm9,pairwise~contingent)
pval<-summary(emm9$contrasts)$p.value
print(c(emm9$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm9,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm9,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

kor_lname <- kor$Language_name[1]

kor_eff <- eff_size(emm9,sigma = sigma(lm9), edf = df.residual(lm9))

kor_eff <- summary(kor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(kor_lname,kor_eff)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE df t.ratio p.value
 contingent - (non-contingent)     -107 12.8 29  -8.355  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.000000003255291

[1] 0.00000004557408


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833
5        French -1.74618705563958
6      Croatian -1.73388694741294
7      Japanese -1.70808151489496
8        Korean -2.27400592391078


In [27]:
%%R -i nor

lm10 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id), data=nor, REML= FALSE)
emm10<-emmeans(lm10,pairwise~contingent)
pval<-summary(emm10$contrasts)$p.value
print(c(emm10$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm10,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm10,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

nor_lname <- nor$Language_name[1]

nor_eff <- eff_size(emm10,sigma = sigma(lm10), edf = df.residual(lm10))

nor_eff <- summary(nor_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(nor_lname,NaN)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -32.1 12.3 28.4  -2.608  0.0144

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.01435942

[1] 0.2010319


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



  Language_name  rand_effect_size
1        German -1.95097516423439
2       English -2.48849254106895
3      Estonian -1.39964309587243
4       Persian -2.19634516956833
5        French -1.74618705563958
6      Croatian -1.73388694741294
7      Japanese -1.70808151489496
8        Korean -2.27400592391078
9     Norwegian               NaN


In [28]:
%%R -i pol

# simple linear model (no random effects, because only 1 transcript from 1 sub)

lm11 <- lm(sums ~ contingent ,data=pol, REML= FALSE)
emm11<-emmeans(lm11,pairwise~contingent)
pval<-summary(emm11$contrasts)$p.value
print(c(emm11$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm11,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm11,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

pol_lname <- pol$Language_name[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(pol_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate  SE df t.ratio p.value
 contingent - (non-contingent)       -7 NaN  0     NaN     NaN


[[2]]
[1] NaN

[1] NaN
   Language_name  rand_effect_size
1         German -1.95097516423439
2        English -2.48849254106895
3       Estonian -1.39964309587243
4        Persian -2.19634516956833
5         French -1.74618705563958
6       Croatian -1.73388694741294
7       Japanese -1.70808151489496
8         Korean -2.27400592391078
9      Norwegian               NaN
10        Polish               NaN


In [29]:
%%R -i por

lm12 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=por, REML= FALSE)
emm12<-emmeans(lm12,pairwise~contingent)
pval<-summary(emm12$contrasts)$p.value
print(c(emm12$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm12,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm12,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

por_lname <- por$Language_name[1]

por_eff <- eff_size(emm12,sigma = sigma(lm12), edf = df.residual(lm12))

por_eff <- summary(por_eff)$effect.size

effect_sizes[nrow(effect_sizes)+1,] <- c(por_lname,por_eff)
effect_sizes

[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -39.4 9.32 24.1  -4.228  0.0003

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.00029494

[1] 0.004129159


R[write to console]: Since 'object' is a list, we are using the contrasts already present.



   Language_name  rand_effect_size
1         German -1.95097516423439
2        English -2.48849254106895
3       Estonian -1.39964309587243
4        Persian -2.19634516956833
5         French -1.74618705563958
6       Croatian -1.73388694741294
7       Japanese -1.70808151489496
8         Korean -2.27400592391078
9      Norwegian               NaN
10        Polish               NaN
11    Portuguese -1.27470969183833


In [30]:
%%R -i spa

lm13 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=spa, REML= FALSE)
emm13<-emmeans(lm13,pairwise~contingent)
pval<-summary(emm13$contrasts)$p.value
print(c(emm13$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm13,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm13,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

spa_lname <- spa$Language_name[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(spa_lname,NaN)
effect_sizes

[[1]]
 contrast                      estimate   SE df t.ratio p.value
 contingent - (non-contingent)     -2.5 5.96 31  -0.420  0.6777

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.6776767

[1] 1
   Language_name  rand_effect_size
1         German -1.95097516423439
2        English -2.48849254106895
3       Estonian -1.39964309587243
4        Persian -2.19634516956833
5         French -1.74618705563958
6       Croatian -1.73388694741294
7       Japanese -1.70808151489496
8         Korean -2.27400592391078
9      Norwegian               NaN
10        Polish               NaN
11    Portuguese -1.27470969183833
12       Spanish               NaN


In [31]:
%%R -i swe

lm14 <- lmer(sums ~ contingent + (1|target_child_id) + (1|transcript_id),data=swe, REML= FALSE)
emm14<-emmeans(lm14,pairwise~contingent)
pval<-summary(emm14$contrasts)$p.value
print(c(emm14$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm14,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm14,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

swe_lname <- swe$Language_name[1]

effect_sizes[nrow(effect_sizes)+1,] <- c(swe_lname,NaN)
effect_sizes

R[write to console]: boundary (singular) fit: see help('isSingular')

R[write to console]: boundary (singular) fit: see help('isSingular')



[[1]]
 contrast                      estimate   SE   df t.ratio p.value
 contingent - (non-contingent)    -38.8 13.1 17.1  -2.947  0.0090

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.008988988

[1] 0.1258458
   Language_name  rand_effect_size
1         German -1.95097516423439
2        English -2.48849254106895
3       Estonian -1.39964309587243
4        Persian -2.19634516956833
5         French -1.74618705563958
6       Croatian -1.73388694741294
7       Japanese -1.70808151489496
8         Korean -2.27400592391078
9      Norwegian               NaN
10        Polish               NaN
11    Portuguese -1.27470969183833
12       Spanish               NaN
13       Swedish               NaN


In [32]:
%%R -i zho

lm15 <- lmer(sums ~ contingent + (1|transcript_id),data=zho, REML= FALSE)
emm15<-emmeans(lm15,pairwise~contingent)
pval<-summary(emm15$contrasts)$p.value
print(c(emm15$contrasts, pval))
print(p.adjust(pval, "holm", 14))
# summary(emmeans(lm15,"contingent",infer=TRUE)) #group means
# test(contrast(emmeans(lm15,"contingent"), "trt.vs.ctrl"), joint = TRUE) #main effect - are any groups different

# zho_lname <- zho$Language_name[1]

# zho_eff <- eff_size(emm15,sigma = sigma(lm15), edf = df.residual(lm15))

# zho_eff <- summary(zho_eff)$effect.size

# effect_sizes[nrow(effect_sizes)+1,] <- c(zho_lname,zho_eff)
# effect_sizes

[[1]]
 contrast                      estimate  SE df  t.ratio p.value
 contingent - (non-contingent)    -91.5 0.5  4 -183.000  <.0001

Degrees-of-freedom method: kenward-roger 

[[2]]
[1] 0.00000000534885

[1] 0.0000000748839


In [33]:
%%R
write.csv(x=effect_sizes,'../data/lexdiv_effect_sizes.csv', row.names = FALSE)