In [141]:
library(readxl)
library(data.table)
library(ggplot2)
library(ggprism)
library(ggforce)
library(ggetho)
library(ggsignif)
library(zoo)
theme_set(theme_prism(base_size=20))
options(repr.plot.width=15, repr.plot.height=15)
source("R/plot.R")
source("R/Cbind.R")
source("R/link_idoc_metadata.R")
source("R/library.R")

pipeline_parameters <- yaml::read_yaml("parameters.yaml")
# whether to use the user annotated result in the metadata files (which could be biased)
# or exclusively the computer generated result (which will very little or no bias)
SCORING_SYSTEM <- pipeline_parameters$SCORING_SYSTEM
# whether to use flies where at least one of the trials has an undetermined preference 
# in that case, the final value is the mean of the trials with a determined preference
# example: if the result of two trials is -0.3 and NA, the final result is -0.3 (not -0.15)
USE_INCOMPLETE_TESTS <- pipeline_parameters$USE_INCOMPLETE_TESTS

In [142]:
metadata <- data.table::fread("metadata.csv")

In [143]:
result_dir <- "/idoc_data/IDOC"
metadata_linked <- link_idoc_metadata(metadata, result_dir=result_dir, verbose=FALSE)
n_missing_animals <- nrow(metadata_linked[is.na(idoc_folder),])
if (n_missing_animals>0) {
    warning(paste(n_missing_animals, "animals could not be linked"))
}

In [144]:
metadata_linked <- metadata_linked[!is.na(idoc_folder),]
metadata_linked[, PRE_manual := PRE]
metadata_linked[, POST_manual := POST]

In [145]:
data.table::fwrite(x = metadata_linked, file = "IDOC_paper_metadata.csv")

In [146]:
# use raw csv
data_raw <- load_idoc_data(
    metadata_linked,
    # how many exits need to happen for the trial to be considered?
    min_exits=3,
    # load from trial 1 and trial 2, applying the min_exit requirement to each separately
    trials=1:2,
    use_incomplete_tests = USE_INCOMPLETE_TESTS,
    verbose=FALSE,
    # summary_FUN=average_trial
    summary_FUN=list(
        PRE=average_trial,
        POST=average_trial
    )
)

In [147]:
data_raw[,PRE_machine := PRE]
data_raw[,PRE_1_machine := PRE_1]
data_raw[,PRE_2_machine := PRE_2]
data_raw[,POST_machine := POST]
data_raw[,POST_1_machine := POST_1]
data_raw[,POST_2_machine := POST_2]

In [148]:
if (SCORING_SYSTEM=="manual") {
  message("Using human made scores")   
  data_raw[ , PRE := PRE_manual]
  data_raw[ , POST := POST_manual]
  data_raw[, PRE_criteria := "manual"]
  data_raw[, POST_criteria := "manual"]
    
} else if (SCORING_SYSTEM=="flexible") {
  message("Using flexible scores")   
  data_raw[ , PRE := PRE_machine]
  data_raw[ , POST := POST_machine]
  data_raw[, PRE_criteria := "machine"]
  data_raw[, POST_criteria := "machine"]
  data_raw[!is.na(PRE_manual) & !(PRE_Reason%in%c("", "?", "Machine-override")), PRE_criteria := "manual"]
  data_raw[!is.na(PRE_manual) & !(PRE_Reason%in%c("", "?", "Machine-override")), PRE := PRE_manual]
  data_raw[!is.na(POST_manual) & !(POST_Reason%in%c("", "?", "Machine-override")), POST_criteria := "manual"]
  data_raw[!is.na(POST_manual) & !(POST_Reason%in%c("", "?", "Machine-override")), POST := POST_manual]
    
} else {
  message("Using machine made scores")   
  data_raw[ , PRE := PRE_machine]
  data_raw[ , POST := POST_machine]
}

Using flexible scores



In [149]:
table(data_raw$POST_Reason)


                                               
                                          1044 
                                             ? 
                                             3 
                                  AOJ-override 
                                             3 
                discard due to LTM not working 
                                            20 
                    discard due to LTM working 
                                            16 
                                Human-override 
                                            51 
                       Human-override; discard 
                                             3 
Human-override; discard due to LTM not working 
                                             1 
                                     Left-bias 
                                             2 
                              Machine-override 
                                            84 
                                    Rig

In [150]:
data.table::fwrite(x = data_raw, file = "raw_data_wide.csv")

In [151]:
data <- data.table::copy(data_raw)
data[is.na(POST), POST_Reason := "POST_NA"]
data[is.na(PRE), PRE_Reason := "PRE_NA"]
data[PRE > 0.2, PRE_Reason := "Pre-attraction"]
data[PRE < -0.2, PRE_Reason := "Pre-aversion"]


In [152]:
table(data$POST_Reason)


                                                            ? 
                           706                              3 
                  AOJ-override discard due to LTM not working 
                             3                             16 
    discard due to LTM working                 Human-override 
                            12                             46 
       Human-override; discard                      Left-bias 
                             2                              2 
              Machine-override                        POST_NA 
                            82                            355 
                    Right-bias 
                             1 

In [153]:
table(data$PRE_Reason)


                                ?   Human-override Machine-override 
             931                3                2               18 
          PRE_NA   Pre-attraction     Pre-aversion 
              92              124               58 

In [154]:
nrow(data[PRE_Reason%in%c("", "?") & POST_Reason %in% c("", "?", "Human-override", "Machine-override"),])

In [155]:
# verify there are not repeats (the same fly represent more than once)
counts <- data[, .N, by=fly_name_reference]
stopifnot(all(counts$N==1))

In [156]:
data[, id := 1:.N]

In [157]:
data.table::fwrite(x = data[PRE_ROI!="NONE", .N, .(idoc_folder)], file = "index.csv")

In [158]:
data_long <- melt(data = data, measure.vars = c("PRE", "POST"), value.name = "PI", variable.name = "test")
data.table::fwrite(x = data_long, file = "tidy_data.csv")
data.table::fwrite(x = data, file = "tidy_data_wide.csv")

In [159]:
data_raw[POST_criteria=="manual", .N, by=POST_Reason]

POST_Reason,N
<chr>,<int>
Human-override,38
AOJ-override,3
discard due to LTM not working,7
discard due to LTM working,11
