# Script C: Experiment 2

In [None]:
library(osfr)
library(tidyverse)


── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors


Attaching package: 'rstatix'

The following object is masked from 'package:stats':

    filter

here() starts at /Users/rp3650/Library/CloudStorage/GoogleDrive-robpetrosino@gmail.com/My Drive/Academics/projects/morphology/morphological-decomposition/sub-projects/frequency-effects/frequency-effect_masked-priming

## Data analysis

In [None]:

# load the raw data dataframe
exp2_data_folder <- "data/experiment2"
exp2_rawdata_filename <- "experiment_2_preprocessed_data.csv"

## 02. check if the rawdata file exists. if not, download it from OSF.
if (!file.exists(here(exp2_data_folder, exp2_rawdata_filename))) {
  osf_retrieve_file("k3gpc") |> 
    osf_download(path = here(exp2_data_folder),
                 conflicts = "overwrite") 
}

## 03. read the data into R.
exp2_rawdata <- here(exp2_data_folder, exp2_rawdata_filename) |>
  read.csv(na = c("", "NA")) %>%
  mutate(primeTime = primeDuration - maskDuration) %>% # calculating the actual SOA
  rename(list = "Group_Nr")

exp2_info <- list()
exp2_info$intended_prime_duration <- 33
exp2_info$prime_dur_lb <- 25
exp2_info$prime_dur <- 50
exp2_info$prime_dur_ub <- 60
exp2_info$rt_lb <- 200
exp2_info$rt_ub <- 1800
exp2_info$freq_conditions <- c("high", "low", "non-word")
exp2_info$n_recruited <- exp2_rawdata$Rec_Session_Id |>
  unique() |>
  length()

exp2_rawdata.sub <- exp2_rawdata %>%
  filter(!is.na(TimeMeasure_Mean) & !is.na(primeDuration) & !is.na(responseError))

exp2_subj.error <- exp2_rawdata.sub %>% group_by(Crowdsourcing_SubjId) %>%
  summarise(mean.error = mean(responseError))
            
exp2_info$summary <- with(
  transform(exp2_rawdata.sub,
    RT_inrange = ifelse(RT >= exp2_info$rt_lb & RT <= exp2_info$rt_ub, 1, 0),
    Prime_inrange = ifelse((primeDuration - maskDuration) >= exp2_info$prime_dur_lb &
                             (primeDuration - maskDuration) <= exp2_info$prime_dur_ub, 1, 0)),
  {
    data.frame(aggregate(Start_Time ~ Rec_Session_Id + Crowdsourcing_SubjId, data=exp2_rawdata.sub, unique),
               aggregate(End_Time_Local ~ Rec_Session_Id + Crowdsourcing_SubjId, data=exp2_rawdata.sub, unique),
               aggregate(cbind(list, SelectedGender, SelectedAge) ~ Rec_Session_Id + Crowdsourcing_SubjId, data=exp2_rawdata.sub, unique),
      aggregate(cbind(responseError, RT_inrange, Prime_inrange) ~ Rec_Session_Id + Crowdsourcing_SubjId, mean, data=exp2_rawdata.sub)
  )
}
)

exp2_info$summary <- exp2_info$summary[, -grep("Rec_Session_Id.|Crowdsourcing_SubjId.", colnames(exp2_info$summary))] # remove all extra aggregating columns (subj ID)

exp2_info$summary$Duration <- interval(ymd_hms(exp2_info$summary$Start_Time), 
                                             ymd_hms(exp2_info$summary$End_Time_Local)) |>
                                      lapply(function(interval_value) {interval_value/dminutes(1)}) |> 
                                           unlist()


### Step 1: subject and item performance

In [None]:

exp2_step1_goodsubj <- exp2_info$summary |>
  subset(responseError <= .3) 

exp2_step1_subj_remain <- exp2_step1_goodsubj |> nrow()

exp2_step1_item.err <- exp2_rawdata.sub %>% group_by(condition_rec, target_rec) %>%
  summarise(word.percent=mean(responseError)*100) %>% 
  filter(word.percent > 30)


`summarise()` has grouped output by 'condition_rec'. You can override using the
`.groups` argument.

### Step 2: prime durations

In [None]:

exp2_summary.primeTime <- exp2_rawdata.sub %>% 
  summarise(meanPrimeTime = round(mean(primeTime), 2), 
            sdPrimeTime = round(sd(primeTime), 2))

exp2_primeTimeRangeSummary <- exp2_rawdata.sub %>% 
  group_by(primeTime) %>%
  mutate(range = ifelse(primeTime < exp2_info$prime_dur_lb, "below", 
                        ifelse(primeTime > exp2_info$prime_dur_ub, "above",
                               "in range"))) %>% 
  group_by(range) %>% tally() %>% ungroup() %>%
  mutate(range.percent = round((n*100)/nrow(exp2_rawdata.sub),2))

exp2_data_step2 <- exp2_data_step1  |>
  subset(primeTime >= exp2_info$prime_dur_lb & primeTime <= exp2_info$prime_dur_ub)

exp2_step2_subj_remain <- exp2_data_step2$Rec_Session_Id |>
  unique() |>
  length()

exp2_step2_trials_remain <- nrow(exp2_data_step2)


### Step 3: RT distribution

In [None]:

# RT outliers 
exp2_data_step3 <- exp2_data_step2 |> 
  subset(RT >= exp2_info$rt_lb & RT <= exp2_info$rt_ub)

exp2_step3_subj_remain <- exp2_data_step3$Rec_Session_Id |>
  unique() |>
  length()

exp2_step3_trials_remain <- nrow(exp2_data_step3)

# error trial removal

exp2_data_step3b <- exp2_data_step3  |>
  subset(responseError == 0)

exp2_step3b_subj_remain <- exp2_data_step3b$Rec_Session_Id |>
  unique() |>
  length()

exp2_step3b_trials_remain <- nrow(exp2_data_step3b)

# remove subjects with less than 7 trials in at least one condition*primetype combination (half of the total number of items per combination)
rt_data_labels <- c("Rec_Session_Id", "condition_rec", "primetype_rec", "RT")

exp2_subj_filter_2 <- exp2_data_step3b[, rt_data_labels] |>
  aggregate(RT ~ ., FUN = length, drop = FALSE) |>
  subset(RT < 7, select = Rec_Session_Id) |>
  unique() |>
  unlist()

### we also want to sure that all subjects have all conditions; in case some subject had all the trials for a given condition lost down the road, they will be removed
exp2_subj_filter_conditions <- 
  exp2_data_step3b %>%
  group_by(Rec_Session_Id) %>% 
  distinct(condition_rec, primetype_rec) %>% 
  tally() %>% filter(n != 6) %>% pull(Rec_Session_Id)

exp2_data_final <- exp2_data_step3b |>
  subset(!(Rec_Session_Id %in% exp2_subj_filter_2) & !(Rec_Session_Id %in% exp2_subj_filter_conditions)) %>%
  mutate(condition_rec = as.factor(condition_rec), primetype_rec=as.factor(primetype_rec))

exp2_final_subj_remain <- exp2_data_final$Rec_Session_Id |>
  unique() |> 
  length()
  
exp2_final_trials_remain <- nrow(exp2_data_final)


## Results

In [None]:

# error rates averages
### we also want to sure that all subjects have all conditions; in case some subject had all the trials for a given condition lost down the road, they will be removed. Crucially the trial calculations are made on the dataset *before* the trial error removal step
exp2_subj_filter_2_with.errors <- exp2_data_step3[, rt_data_labels] |>
  aggregate(RT ~ ., FUN = length, drop = FALSE) |>
  subset(RT < 7, select = Rec_Session_Id) |>
  unique() |>
  unlist()
# this step just makes sure that the same subjects will be removed from both datasets
exp2_subj_filter_2_with.errors <- union(exp2_subj_filter_2_with.errors, exp2_subj_filter_2)

### just making sure that all subjects have all conditions; in case some subject had all the trials for a given condition lost down the road, they will be removed
exp2_subj_filter_conditions_with.errors <- 
  exp2_data_step3 %>%
  group_by(Rec_Session_Id) %>% 
  distinct(condition_rec, primetype_rec) %>% 
  tally() %>% filter(n != 6) %>% pull(Rec_Session_Id)

exp2_data_final_with.errors <- exp2_data_step3 |>
  subset(!(Rec_Session_Id %in% exp2_subj_filter_2_with.errors) & 
           !(Rec_Session_Id %in% exp2_subj_filter_conditions_with.errors)) 

exp2_error.rates <- exp2_data_final_with.errors %>%
  mutate(primetype_rec = factor(primetype_rec, levels=c("unrelated", "related")),
         condition_rec = factor(condition_rec, levels=c("high", "low", "non-word"))) %>%
  group_by(condition_rec, primetype_rec, Rec_Session_Id) %>%
  summarise(error.percent=mean(responseError)*100)


`summarise()` has grouped output by 'condition_rec', 'primetype_rec'. You can
override using the `.groups` argument.

`summarise()` has grouped output by 'Rec_Session_Id', 'condition_rec'. You can
override using the `.groups` argument.

`summarise()` has grouped output by 'Rec_Session_Id', 'condition_rec'. You can
override using the `.groups` argument.

`summarise()` has grouped output by 'condition_rec'. You can override using the
`.groups` argument.

Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

Loading required package: carData

Attaching package: 'car'

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: Gamma  ( identity )
Formula: RT ~ condition_rec * primetype_rec + (1 | Crowdsourcing_SubjId) +  
    (1 | target_rec)
   Data: exp2_data_final
Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e+06))

     AIC      BIC   logLik deviance df.resid 
 2051808  2051899 -1025895  2051790   168186 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.4411 -0.5829 -0.1749  0.3419 12.8540 

Random effects:
 Groups               Name        Variance Std.Dev.
 Crowdsourcing_SubjId (Intercept) 1842.747 42.9272 
 target_rec           (Intercept)  136.400 11.6790 
 Residual                            0.041  0.2025 
Number of obs: 168195, groups:  Crowdsourcing_SubjId, 1924; target_rec, 104

Fixed effects:
                              Estimate Std. Error  t value Pr(>|z|)    
(Intercept)                   619.6174     0.4438 1396.289  < 2e-16 ***
condition_rec1        

Analysis of Deviance Table (Type III Wald chisquare tests)

Response: RT
                                Chisq Df Pr(>Chisq)    
(Intercept)                 1949622.3  1  < 2.2e-16 ***
condition_rec                  2613.3  2  < 2.2e-16 ***
primetype_rec                  1700.5  1  < 2.2e-16 ***
condition_rec:primetype_rec    1158.9  2  < 2.2e-16 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ℹ In argument: `across(c(13), round, 2)`.
! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
Supply arguments directly to `.fns` through an anonymous function instead.

  # Previously
  across(a:b, mean, na.rm = TRUE)

  # Now
  across(a:b, \(x) mean(x, na.rm = TRUE))

#### Stats summary

In [None]:

exp2_summary.results_mop <- merge(exp2_gdavg_mop_summary, exp2_rt_stats_main, by='factor')
exp2_summary.results_fae <- merge(exp2_gdavg_fae_summary, exp2_rt_stats_interaction, by='factor') |>
  select(-mean_high, -mean_low)

exp2_summary.results <- bind_rows(exp2_summary.results_mop, exp2_summary.results_fae)
  
exp2_summary.results %>%
  mutate(t = round(t, 2)) %>%
  relocate(c("sd_unrelated", "mean.error_unrelated"), .before=gd.mean_related) %>%
  gt() %>%
  cols_label(
    CI = "95% CI",
    contains("mean") ~ "mean",
    contains("sd") ~ "SD", 
    contains("error") ~ "Error (%)"
  ) %>%
  tab_spanner(
    label = "unrelated RT",
    columns = c(2:4)
  ) %>%
  tab_spanner(
    label = "repetition RT",
    columns = c(5:7)
  ) %>%
  tab_spanner(
    label = 'priming effects',
    columns = c(9:12)
  ) %>%
  tab_spanner(
    label = md("_t_-test"),
    columns = c(13:15)
  ) %>%
  cols_label(
    sd = md("SD~p~")
  ) %>%
  cols_label(
    t = md("_t_"),
    p = md("_p_"),
  ) %>%
   sub_missing(
    missing_text = " "
  )
