In [45]:
library(readxl)
library(data.table)
library(ggplot2)
library(ggprism)
library(ggforce)
library(ggetho)
library(ggsignif)
library(zoo)
theme_set(theme_prism(base_size=20))
options(repr.plot.width=15, repr.plot.height=15)
source("R/plot.R")
source("R/Cbind.R")
source("R/link_idoc_metadata.R")
source("R/library.R")

pipeline_parameters <- yaml::read_yaml("parameters.yaml")
# whether to use the user annotated result in the metadata files (which could be biased)
# or exclusively the computer generated result (which will very little or no bias)
SCORING_SYSTEM <- pipeline_parameters$SCORING_SYSTEM
# whether to use flies where at least one of the trials has an undetermined preference 
# in that case, the final value is the mean of the trials with a determined preference
# example: if the result of two trials is -0.3 and NA, the final result is -0.3 (not -0.15)
USE_INCOMPLETE_TESTS <- pipeline_parameters$USE_INCOMPLETE_TESTS

In [46]:
metadata <- data.table::fread("metadata.csv")

In [47]:
metadata[, sort(unique(Files))]

In [48]:
result_dir <- "IDOC_dataset/"
result_dir <- "/idoc_data/IDOC"
metadata_linked <- link_idoc_metadata(metadata, result_dir=result_dir, verbose=FALSE)
n_missing_animals <- nrow(metadata_linked[is.na(idoc_folder),])
if (n_missing_animals>0) {
    warning(paste(n_missing_animals, "animals could not be linked"))
}
metadata_linked <- metadata_linked[!is.na(idoc_folder),]
metadata_linked[, PRE_manual := PRE]
metadata_linked[, POST_manual := POST]

In [49]:
data.table::fwrite(x = metadata_linked, file = "IDOC_paper_metadata.csv")

In [50]:
# use raw csv
data_raw <- load_idoc_data(
    metadata_linked,
    # how many exits need to happen for the trial to be considered?
    min_exits=3,
    # load from trial 1 and trial 2, applying the min_exit requirement to each separately
    trials=1:2,
    use_incomplete_tests = USE_INCOMPLETE_TESTS,
    mm_decision_zone = 7,
    verbose=FALSE,
    # summary_FUN=average_trial
    summary_FUN=list(
        PRE=average_trial,
        POST=average_trial
    )
)

In [51]:
index <- data_raw[PRE_ROI!="NONE", .SD[, .(.N, PRE_files, POST_files)], by=idoc_folder][, .SD[1,], by=idoc_folder]
index <- index[
    sapply(index$PRE_files, function(row) {all(!sapply(row, is.na))})
]
index <- index[
    sapply(index$POST_files, function(row) {all(!sapply(row, is.na))}),
]
data.table::fwrite(x=index, file="index.csv")

In [52]:
data_raw[,PRE_machine := PRE]
data_raw[,PRE_1_machine := PRE_1]
data_raw[,PRE_2_machine := PRE_2]
data_raw[,POST_machine := POST]
data_raw[,POST_1_machine := POST_1]
data_raw[,POST_2_machine := POST_2]

In [53]:
if (SCORING_SYSTEM=="manual") {
  message("Using human made scores")   
  data_raw[ , PRE := PRE_manual]
  data_raw[ , POST := POST_manual]
  data_raw[, PRE_criteria := "manual"]
  data_raw[, POST_criteria := "manual"]
    
} else if (SCORING_SYSTEM=="flexible") {
  message("Using flexible scores")   
  data_raw[ , PRE := PRE_machine]
  data_raw[ , POST := POST_machine]
  data_raw[, PRE_criteria := "machine"]
  data_raw[, POST_criteria := "machine"]
  data_raw[!is.na(PRE_manual) & !(PRE_Reason%in%c("", "?", "Machine-override")), PRE_criteria := "manual"]
  data_raw[!is.na(PRE_manual) & !(PRE_Reason%in%c("", "?", "Machine-override")), PRE := PRE_manual]
  data_raw[!is.na(POST_manual) & !(POST_Reason%in%c("", "?", "Machine-override")), POST_criteria := "manual"]
  data_raw[!is.na(POST_manual) & !(POST_Reason%in%c("", "?", "Machine-override")), POST := POST_manual]
    
} else {
  message("Using machine made scores")   
  data_raw[ , PRE := PRE_machine]
  data_raw[ , POST := POST_machine]
}

Using flexible scores



In [54]:
table(data_raw$POST_Reason)


                                               
                                          1108 
                                             ? 
                                             3 
                                  AOJ-override 
                                             3 
                discard due to LTM not working 
                                            20 
                    discard due to LTM working 
                                            16 
                                Human-override 
                                            44 
                       Human-override; discard 
                                             3 
Human-override; discard due to LTM not working 
                                             1 
                 IDOC2 not working as expected 
                                            31 
                                     Left-bias 
                                             4 
                              Machine-o

In [55]:
data.table::fwrite(x = data_raw, file = "raw_data_wide.csv")

In [56]:
data <- data.table::copy(data_raw)
data[is.na(POST), POST_Reason := "POST_NA"]
data[is.na(PRE), PRE_Reason := "PRE_NA"]
data[PRE > 0.2, PRE_Reason := "Pre-attraction"]
data[PRE < -0.2, PRE_Reason := "Pre-aversion"]


In [57]:
# data[Training=="6X_Massed", .(Files, PRE, POST)]

In [58]:
table(data$POST_Reason)


                                                            ? 
                           771                              3 
                  AOJ-override discard due to LTM not working 
                             3                             16 
    discard due to LTM working                 Human-override 
                            12                             40 
       Human-override; discard  IDOC2 not working as expected 
                             2                             17 
                     Left-bias               Machine-override 
                             3                             82 
                       POST_NA 
                           368 

In [59]:
table(data$PRE_Reason)


                                ?   Human-override Machine-override 
            1002                3                2               18 
          PRE_NA   Pre-attraction     Pre-aversion       Right-bias 
             104              132               55                1 

In [60]:
nrow(data[PRE_Reason%in%c("", "?") & POST_Reason %in% c("", "?", "Human-override", "Machine-override"),])

In [61]:
# verify there are not repeats (the same fly represent more than once)
counts <- data[, .N, by=fly_name_reference]
stopifnot(all(counts$N==1))

In [62]:
counts[N!=1,]

fly_name_reference,N
<chr>,<int>


In [63]:
data[, id := 1:.N]

In [64]:
data_long <- melt(data = data, measure.vars = c("PRE", "POST"), value.name = "PI", variable.name = "test")
data.table::fwrite(x = data_long, file = "tidy_data.csv")
data.table::fwrite(x = data, file = "tidy_data_wide.csv")

In [65]:
data[, .N, by=.(Genotype, Files)]

Genotype,Files,N
<chr>,<chr>,<int>
MB010B.(II)SPARC-Chrimson ISO,2021-09-30 (CH),12
MB010B.(II)SPARC-Chrimson ISO,2021-09-30 (CH1),9
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH1),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH2),11
MB010B.(II)SPARC-GFP ISO,2021-10-02 (CH),13
MB010B.(II)SPARC-GFP ISO,2021-10-02 (CH1),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH1),11
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH2),14


In [66]:
data_raw[POST_criteria=="manual", .N, by=POST_Reason]

POST_Reason,N
<chr>,<int>
Human-override,33
AOJ-override,3
discard due to LTM not working,7
discard due to LTM working,11
