In [323]:
library(readxl)
library(data.table)
library(ggplot2)
library(ggprism)
library(ggforce)
library(ggetho)
library(ggsignif)
library(zoo)
theme_set(theme_prism(base_size=20))
options(repr.plot.width=15, repr.plot.height=15)
source("R/plot.R")
source("R/Cbind.R")
source("R/link_idoc_metadata.R")
source("R/library.R")

This notebook reads the IDOC results stored in "IDOC_METHODS_PAPER_METADATA.xlsx" and produces a csv file with the data in tidy format

* Fly_group_experiment_treatment: experimental information of the fly 
* fly_name_reference: unique animal identifier, built as follows: date_time(scientist_suffix)_ROI_X
* experiment: type of memory evaluated (20min STM, 1h STM, LTM, LTM CHX, ...)
* id
* test: (PI is read PRE or POST conditioning)
* PI: (value of PI)
* Training_session: how many times CS-US pairing is presented
* Genotype: genotype of the fly
* interval: NONE, or ZTX-ZTY if the fly was SD'd between ZT X and ZT Y
* ZT4,ZT4.5,ZT5,ZT5.5,ZT6,ZT6.5,ZT7,ZT7.5,ZT8,ZT8.5,ZT9,ZT9.5,ZT10,ZT10.5,ZT11,ZT11.5,ZT12,ZT12.5,ZT13,ZT13.5,ZT14,ZT14.5,ZT15,ZT15.5,ZT16,ZT16.5,ZT17,ZT17.5,ZT18,ZT18.5,ZT19,ZT19.5,ZT20,ZT20.5,ZT21,ZT21.5,ZT22,ZT22.5,ZT23,ZT23.5,ZT24,ZT24.5,ZT25,ZT25.5,ZT26,ZT26.5,ZT27,ZT27.5,ZT28,ZT28.5,ZT29,ZT29.5,ZT30
* ZT4_interactions,ZT4.5_interactions,ZT5_interactions,ZT5.5_interactions,ZT6_interactions,ZT6.5_interactions,ZT7_interactions,ZT7.5_interactions,ZT8_interactions,ZT8.5_interactions,ZT9_interactions,ZT9.5_interactions,ZT10_interactions,ZT10.5_interactions,ZT11_interactions,ZT11.5_interactions,ZT12_interactions,ZT12.5_interactions,ZT13_interactions,ZT13.5_interactions,ZT14_interactions,ZT14.5_interactions,ZT15_interactions,ZT15.5_interactions,ZT16_interactions,ZT16.5_interactions,ZT17_interactions,ZT17.5_interactions,ZT18_interactions,ZT18.5_interactions,ZT19_interactions,ZT19.5_interactions,ZT20_interactions,ZT20.5_interactions,ZT21_interactions,ZT21.5_interactions,ZT22_interactions,ZT22.5_interactions,ZT23_interactions,ZT23.5_interactions,ZT24_interactions,ZT24.5_interactions,ZT25_interactions,ZT25.5_interactions,ZT26_interactions,ZT26.5_interactions,ZT27_interactions,ZT27.5_interactions,ZT28_interactions,ZT28.5_interactions,ZT29_interactions,ZT29.5_interactions,ZT30_interactions
User


In [324]:
database <- "IDOC_metadata_AOJ - metadata.csv"
metadata_aoj <- data.table::fread(database)[fly_name_reference != "" & comment=="OK",]
metadata_aoj<-metadata_aoj[keep==TRUE, ]
metadata_aoj<-metadata_aoj[Training%in%c("1X", "6X_Spaced", "6X_Massed"),]
metadata_aoj<-metadata_aoj[Gender=="FEM",]
metadata_etho_aoj <- data.table::fread("IDOC_metadata_AOJ - ethoscope_overnight.csv")
metadata_aoj <- merge(
    metadata_aoj,
    metadata_etho_aoj[, .(
        PRE_ROI, POST_ROI, Files, region_id, date, machine_name, schedule,
        SD_quality, SD_quality_second_eye, interactor, interactor_time_window, food
    )],
    by=c("PRE_ROI", "POST_ROI", "Files"),
    all=TRUE
)
metadata_aoj[is.na(food), food := "NONE"]


In [325]:
metadata_aoj

PRE_ROI,POST_ROI,Files,row_id,fly_name_reference,incubator,reference_hour,US,CS,Dilution,⋯,group_size_in_vial,region_id,date,machine_name,schedule,SD_quality,SD_quality_second_eye,interactor,interactor_time_window,food
<int>,<int>,<chr>,<int>,<chr>,<chr>,<int>,<chr>,<chr>,<int>,⋯,<chr>,<int>,<IDate>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,1,2024-08-26_14-49-57_AOJ,228,2024-08-26_14-49-57_AOJ_ROI_1,B1,11,75V 0Ω,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-08-27_14-13-07_AOJ_ET,248,2024-08-27_14-13-07_AOJ_ET_ROI_1,B1,11,75V 0Ω,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-10-09_14-14-49_AOJ_ET_6X_spaced_24hrLTM,,,,,,,,⋯,,1,2024-10-09,ETHOSCOPE_031,NONE,NONE,NONE,DefaultStimulator,No_stimulator,sucrose
1,1,2024-10-12_14-35-24_AOJ_6X_spaced_24hrLTM,,,,,,,,⋯,,1,2024-10-12,ETHOSCOPE_031,NONE,NONE,NONE,DefaultStimulator,No_stimulator,sucrose
1,1,2024-10-15_14-30-17_AOJ_dnc,398,2024-10-15_14-30-17_AOJ_dnc_ROI_1,B1,11,75V 4.6kΩ,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-10-27_14-09-02_AOJ_Dnc_2U_STM,461,2024-10-27_14-09-02_AOJ_Dnc_2U_STM_ROI_1,B1,11,75V 4.6kΩ,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-10-28_14-10-07_ET_2U_Dnc_STM,481,2024-10-28_14-10-07_ET_2U_Dnc_STM_ROI_1,B1,11,75V 4.6kΩ,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-10-28_14-52-11_ET_2U_Dnc_STM,501,2024-10-28_14-52-11_ET_2U_Dnc_STM_ROI_1,B1,11,75V 4.6kΩ,OCT,500,⋯,,,,,,,,,,NONE
1,1,2024-10-29_14-37-00_ET_Iso31_2U_LTM,521,2024-10-29_14-37-00_ET_Iso31_2U_LTM_ROI_1,B1,12,75V 4.6kΩ,OCT,500,⋯,,1,2024-10-29,ETHOSCOPE_043,NONE,NONE,NONE,DefaultStimulator,No_stimulator,cornmeal
1,1,2024-10-29_15-38-36_ET_2U_Dnc_STM,541,2024-10-29_15-38-36_ET_2U_Dnc_STM_ROI_1,B1,12,75V 4.6kΩ,OCT,500,⋯,,,,,,,,,,NONE


In [326]:
metadata_aoj[, .N, by=.(Genotype, Files)][order(Files),]

Genotype,Files,N
<chr>,<chr>,<int>
Cs,2024-08-26_14-49-57_AOJ,6
dnc,2024-08-26_14-49-57_AOJ,2
orco,2024-08-27_14-13-07_AOJ_ET,8
Cs,2024-08-27_14-13-07_AOJ_ET,5
dnc,2024-08-27_14-13-07_AOJ_ET,4
Cs,2024-08-27_15-35-18_AOJ_ET,3
dnc,2024-08-27_15-35-18_AOJ_ET,3
,2024-10-07_14-42-08_AOJ_6X_spaced_24hrLTM,10
,2024-10-09_14-14-49_AOJ_ET_6X_spaced_24hrLTM,17
,2024-10-12_14-35-24_AOJ_6X_spaced_24hrLTM,20


In [327]:
database <- "/idoc_data/paper/CHuiData.ods"
sheets <- readODS::list_ods_sheets(database)
columns <- c(
    "Files", "Genotype", "PRE_ROI", "POST_ROI", "Gender", "Age", "ATR", "Training",
    "Odor", "Sugar", "Electric shock",  "PRE", "PRE_1", "PRE_2", "POST", "POST_1", "POST_2",
    "fly_name_reference", "experiment", "PRE_Count", "POST_Count", "PRE_Reason", "POST_Reason"
)
metadata_ch <- read_idoc_metadata(file = database, sheets=sheets, columns = columns, backend=readODS::read_ods)
metadata_ch$SD_quality <- "NONE"
metadata_ch$SD_quality_second_eye <- "NONE"
metadata_ch$interactor_time_window <- "No_stimulator"
metadata_ch$interactor <- "DefaultStimulator"
metadata_ch$`comment on SD` <- NA
metadata_ch$User <- "CH"
metadata_ch$region_id <- "NONE"
metadata_ch$food <- "cornmeal"
metadata_ch[!experiment %in% c("24hr LTM", "24hr LTM CHX"), food := "NONE"]
# metadata_ch<-metadata_ch[PRE_Count%in% c("V", "OK") & POST_Count %in% c("V", "OK"),]
# metadata_ch[, PRE_Count:=NULL]
# metadata_ch[, POST_Count:=NULL]
print(nrow(metadata_ch))
valid_comments <- c("?", "Human-override", "Machine-override", "AOJ-override")
metadata_ch<-metadata_ch[(is.na(PRE_Reason) | PRE_Reason %in% valid_comments) & (is.na(POST_Reason) | POST_Reason%in% valid_comments), ]
print(nrow(metadata_ch))


“NAs introduced by coercion”


[1] 607
[1] 456


In [328]:
sheets

In [329]:
table(metadata_ch$experiment)


            20min STM    20min STM unpaired              24hr LTM 
                  148                    60                   120 
         24hr LTM CHX 24hr LTM_inconsistent 
                  122                     6 

In [330]:
database <- "idoc_metadata.ods"
sheets <- c("20min_STM", "1hr_STM", "3hr_STM")
columns <- c(
    "Files", "Genotype", "PRE_ROI", "POST_ROI", "User", "fly_name_reference", "Gender", "Age", "ATR", "Training",
    "Odor", "Sugar", "Electric shock",  "PRE", "PRE_1", "PRE_2", "POST", "POST_1", "POST_2", "Keep",
    "SD_quality", "SD_quality_second_eye", "interactor_time_window", "interactor", "comment on SD", "experiment",
    "PRE_Reason", "POST_Reason"
)

## Read STM experiments

In [331]:
metadata_stm <- read_idoc_metadata(file = database, sheets=sheets, columns = columns, backend=readODS::read_ods)
# STM flies don't go to the ethoscope, so region_id is NONE
metadata_stm$region_id <- "NONE"
metadata_stm$food<-"NONE"
metadata_stm<-metadata_stm[Keep==TRUE,]

“Deleting 21 42 63 82 103 124 145 166 rows from metadata”
“Deleting 21 42 63 rows from metadata”
“Deleting 21 42 63 84 105 126 rows from metadata”


In [332]:
metadata_ltm <- read_idoc_metadata(file = database, sheets=c("24hr_LTM"), columns = c(columns, "date", "machine_name", "region_id"), backend=readODS::read_ods)
# keep either NS or good SD flies, (discard bad SD flies) 
metadata_ltm <- metadata_ltm[SD_quality %in% c("No_sleep_depriver", "good") & SD_quality_second_eye %in% c("No_sleep_depriver", "good"),]
metadata_ltm[SD_quality=="No_sleep_depriver", SD_quality := "NONE"]
metadata_ltm[SD_quality_second_eye=="No_sleep_depriver", SD_quality_second_eye := "NONE"]
metadata_ltm$food<-"sucrose"

“NAs introduced by coercion”
“NAs introduced by coercion”
“Deleting 20 38 58 79 100 117 128 139 150 161 172 183 194 205 216 227 238 249 259 270 281 292 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 357 368 379 390 401 rows from metadata”


In [333]:
metadata_gtacr <- read_idoc_metadata(file = database, sheets=c("24hr_LTM_GTACR"), columns =  c(columns, "date", "machine_name", "region_id"), backend=readODS::read_ods)
metadata_gtacr[SD_quality=="No_sleep_depriver", SD_quality := "NONE"]
metadata_gtacr[SD_quality_second_eye=="No_sleep_depriver", SD_quality_second_eye := "NONE"]
metadata_gtacr$food<-"sucrose"

“NAs introduced by coercion”
“NAs introduced by coercion”
“Deleting 11 22 33 44 55 66 rows from metadata”


In [334]:
metadata <- rbind(
    metadata_stm[, .(PRE_Reason, POST_Reason, fly_name_reference, date=NA_character_, reference_hour=NA_real_, machine_name=NA_character_, CS="OCT", food, region_id=NA, Files, PRE_ROI, POST_ROI, Genotype, Gender, Age, interactor, interactor_time_window, Training, SD_quality, SD_quality_second_eye, `comment on SD`, User, experiment, PRE_1, PRE_2, PRE, POST_1, POST_2, POST)],
    metadata_ltm[, .(PRE_Reason, POST_Reason, fly_name_reference, date=as.character(date), reference_hour=11, machine_name, CS="OCT", food, region_id, Files, PRE_ROI, POST_ROI, Genotype, Gender, Age, interactor, interactor_time_window, Training, SD_quality, SD_quality_second_eye, `comment on SD`, User, experiment, PRE_1, PRE_2, PRE, POST_1, POST_2, POST)],
    metadata_gtacr[, .(PRE_Reason, POST_Reason, fly_name_reference, date=as.character(date), reference_hour=NA_real_, machine_name, CS="OCT", food, region_id, Files, PRE_ROI, POST_ROI, Genotype, Gender, Age, interactor, interactor_time_window, Training, SD_quality, SD_quality_second_eye, `comment on SD`, User, experiment, PRE_1, PRE_2, PRE, POST_1, POST_2, POST)],
    metadata_ch[, .(PRE_Reason, POST_Reason, fly_name_reference, date=NA_character_, reference_hour=11, machine_name=NA_character_, CS="OCT", food, region_id=NA, Files, PRE_ROI, POST_ROI, Genotype, Gender, Age, interactor, interactor_time_window, Training, SD_quality, SD_quality_second_eye, `comment on SD`, User, experiment, PRE_1, PRE_2, PRE, POST_1, POST_2, POST)],
    metadata_aoj[, .(PRE_Reason, POST_Reason, fly_name_reference, date=as.character(date), reference_hour, machine_name, region_id, CS, food, Files, PRE_ROI, POST_ROI, Genotype, Gender, Age, interactor, interactor_time_window, Training, SD_quality, SD_quality_second_eye, `comment on SD`="NONE", User, experiment, PRE_1, PRE_2, PRE, POST_1, POST_2, POST)]
)

In [335]:
metadata[experiment=="24hr LTM CHX", experiment := "24hr LTM CXM"]

In [336]:
# format interactor time window so it is either NONE or one of the ZTs
# generate the column SD_status to summarise SD_quality and SD_quality_second_eye
metadata[, SD_status := "NONE"]
metadata[interactor_time_window!="No_stimulator" & !(SD_quality == "good" & SD_quality_second_eye == "good"), SD_status := "bad"]
metadata[SD_quality == "good" & SD_quality_second_eye == "good", SD_status := "good"]
# unknown = gtacr flies
metadata[SD_quality == "unknown" & SD_quality_second_eye == "unknown", SD_status := "unknown"]

metadata[ , interval := interactor_time_window]
metadata[ , interactor_time_window := NULL]
metadata[, SD_quality := NULL]
metadata[, SD_quality_second_eye := NULL]

In [337]:
data.table::fwrite(x = metadata, file="metadata.csv")

In [338]:
metadata_etho <- read_idoc_metadata(file = database, sheets=c("24hr_LTM"), columns = c(columns, "date", "machine_name", "region_id"), backend=readODS::read_ods)
metadata_etho <- metadata_etho[is.na(PRE_ROI),]

“NAs introduced by coercion”
“NAs introduced by coercion”
“Deleting 20 38 58 79 100 117 128 139 150 161 172 183 194 205 216 227 238 249 259 270 281 292 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 357 368 379 390 401 rows from metadata”


In [339]:
metadata_etho[, reference_hour := 11]
metadata_etho[, Genotype := "Iso31"]

In [340]:
data.table::fwrite(x = metadata_etho, file="metadata_etho.csv")

In [341]:
table(metadata$POST_Reason)


                                               
                                           329 
                                             ? 
                                             3 
                                  AOJ-override 
                                             3 
                discard due to LTM not working 
                                            20 
                    discard due to LTM working 
                                            16 
                                Human-override 
                                            44 
                       Human-override; discard 
                                             3 
Human-override; discard due to LTM not working 
                                             1 
                 IDOC2 not working as expected 
                                            31 
                                     Left-bias 
                                             4 
                              Machine-o

In [342]:
sum(is.na(metadata$POST_Reason))

In [343]:
#sort(unique(metadata$Files))

In [344]:
metadata[, .N, by=.(Genotype, Files)][order(Files),]

Genotype,Files,N
<chr>,<chr>,<int>
MB010B.(II)SPARC-Chrimson ISO,2021-09-30 (CH),12
MB010B.(II)SPARC-Chrimson ISO,2021-09-30 (CH1),9
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH1),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-01 (CH2),11
MB010B.(II)SPARC-GFP ISO,2021-10-02 (CH),13
MB010B.(II)SPARC-GFP ISO,2021-10-02 (CH1),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH),13
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH1),11
MB010B.(II)SPARC-Chrimson ISO,2021-10-03 (CH2),14
