# DIRAC Analysis of LC M001-related Transcriptomics — Statistical Tests (GOBP Modules)

***by Kengo Watanabe***  

In the main Python notebook, the differential rank conservation (DIRAC; Eddy, J.A. et al. PLoS Comput. Biol. 2010) analysis is performed on the preprocessed Longevity Consortium (LC) M001-related transcriptomics dataset (Tyshkovskiy, A. et al. Cell Metab. 2019; analytes detected in all samples; sample-based robust Z-score followed by analyte-based robust Z-score) using the retrieved a priori module set (Gene Ontology (Biological Process) derived by R org.Mm.eg.db package; ≥4 analytes and ≥50% coverage).  
**–> Because Dunnett's test does not seem available in Python yet, statistical tests are performed in this sub-notebook with R kernel.**  

Input:  
* Cleaned module metadata: 220522_LC-M001-related-transcriptomics-DIRAC_DIRAC-GOBP_ver2-4_module-metadata.tsv  
* Preprocessed analyte data (for taking IDs of the samples analyzed in DIRAC analysis): 201221_LC-M001-related-transcriptomics-DIRAC_cleaned-robustZscored.tsv  
* Sample–mouse metadata: 201221_LC-M001-related-transcriptomics-DIRAC_metadata.tsv  
* Tables of DIRAC RMSs: 210429_LC-M001-related-transcriptomics-DIRAC_DIRAC-GOBP_ver2_orgMmegdb-GOBP_min-n4-cov50_RankMatchingScore-BS-[digit].tsv  
* Tables of DIRAC RCIs: 210429_LC-M001-related-transcriptomics-DIRAC_DIRAC-GOBP_ver2_orgMmegdb-GOBP_min-n4-cov50_RankConservationIndex-BS-[digit].tsv  

Output:  
* Supplementary Data 5  

Original notebook (memo for my future tracing):  
* dalek:[JupyterLab HOME]/201221_LC-M001-related-transcriptomics-DIRAC/220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4.ipynb  

In [None]:
library("tidyverse")
options(repr.plot.width=5, repr.plot.height=5)#Default=7x7

#CRAN
for (package in c("readxl", "multcomp", "openxlsx")) {
    #install.packages(package)
    eval(bquote(library(.(package))))
    print(str_c(package, ": ", as.character(packageVersion(package))))
}

## 1. Prepare dataset and metadata

In [None]:
#Import module metadata
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_DIRAC-GOBP_ver2-4_"
fileName <- "module-metadata.tsv"
temp <- read_delim(str_c(fileDir,ipynbName,fileName), delim="\t")
print(str_c("nrow: ",nrow(temp)))
head(temp)

module_meta <- temp

In [None]:
#Extract the samples to be analyzed in DIRAC analysis
fileDir <- "./ExportData/"
ipynbName <- "201221_LC-M001-related-transcriptomics-DIRAC_"
fileName <- "cleaned-robustZscored.tsv"
temp <- read_delim(str_c(fileDir,ipynbName,fileName), delim="\t")

#Import sample-mouse metadata
fileDir <- "./ExportData/"
ipynbName <- "201221_LC-M001-related-transcriptomics-DIRAC_"
fileName <- "metadata.tsv"
temp <- read_delim(str_c(fileDir,ipynbName,fileName), delim="\t") %>%
    dplyr::rename(SampleID=ID) %>%
    dplyr::filter(SampleID %in% names(temp)) %>%
    dplyr::select(SampleID, Intervention, Sex, Age) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "-", "Control")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acarbose")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapamycin")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "CR", "Calorie restriction")) %>%
    dplyr::mutate(Phenotype=Intervention)
print(str_c("nrow: ",nrow(temp)))
head(temp)

sample_meta <- temp

## 2. Perform DIRAC with sex-pooled rank consensus

In [None]:
#Import the DIRAC results
nSub <- 25
fileDir <- "./ExportData/"
ipynbName <- "210429_LC-M001-related-transcriptomics-DIRAC_DIRAC-GOBP_ver2_"
rms_tbl <- tibble()
rci_tbl <- tibble()
for (list_i in 1:nSub) {
    index <- str_pad(as.character(list_i), 2, side="left", pad="0")
    fileName <- str_c("orgMmegdb-GOBP_min-n4-cov50_RankMatchingScore-BS-",index,".tsv")
    rms_tbl <- read_delim(str_c(fileDir,ipynbName,fileName), delim="\t", show_col_types=FALSE) %>%
        dplyr::rename(ModuleID=NetworkID) %>%
        dplyr::bind_rows(rms_tbl, .)
    fileName <- str_c("orgMmegdb-GOBP_min-n4-cov50_RankConservationIndex-BS-",index,".tsv")
    rci_tbl <- read_delim(str_c(fileDir,ipynbName,fileName), delim="\t", show_col_types=FALSE) %>%
        dplyr::rename(ModuleID=NetworkID) %>%
        dplyr::bind_rows(rci_tbl, .)
}
print(str_c("nrow: ",nrow(rms_tbl)))
head(rms_tbl)
print(str_c("nrow: ",nrow(rci_tbl)))
head(rci_tbl)

## 3. Rank conservation index: general pattern

### 3-1. Extract RCI (the mean of RMSs under the own phenotype consensus)

In [None]:
#Extract RCI whose template phenotype corresponds to the own phenotype
phenotype_vec <- rci_tbl %>%
    dplyr::select(-ModuleID, -Template) %>%
    names()
temp <- tibble(ModuleID=unique(rci_tbl$ModuleID))
for (k in phenotype_vec) {
    temp <- rci_tbl %>%
        dplyr::filter(Template==!!k) %>%
        dplyr::select(ModuleID, !!k) %>%
        dplyr::left_join(temp, ., by="ModuleID")
}
#Order and re-label
group_vec <- c("Cont", "Acar", "Rapa", "Calo")
temp <- temp %>%
    dplyr::rename(Acar=Acarbose, Cont=Control, Rapa=Rapamycin, Calo=`Calorie restriction`) %>%
    dplyr::select(ModuleID, all_of(group_vec))
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp)

rci_kk <- temp

### 3-2. Dunnett's test

In [None]:
#Perfom Dunnett's test
temp <- rci_kk %>%
    tidyr::gather(key=Group, value=RCI, -ModuleID) %>%
    dplyr::mutate(Group=factor(Group, levels=group_vec))
model <- glht(aov(RCI~Group, data=temp), linfct=mcp(Group="Dunnett"), alternative="two.sided")
summary(model)

In [None]:
#Prepare contrast labels
contrast_vec <- names(summary(model)$test$coefficients) %>%
    str_replace(., " - ", "-vs-")

#Prepare summary table
temp <- tibble(ModuleID="All")
for (i in 1:length(contrast_vec)) {
    label <- contrast_vec[i]
    temp <- temp %>%
        dplyr::mutate("{label}_Coef":=unname(summary(model)$test$coefficients[i]),
                      "{label}_CoefSE":=unname(summary(model)$test$sigma[i]),
                      "{label}_tStat":=unname(summary(model)$test$tstat[i]),
                      "{label}_AdjPval":=unname(summary(model)$test$pvalues[i]))
}
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rci_kk %>%
    tidyr::gather(key=Group, value=RCI, -ModuleID) %>%
    dplyr::mutate(Group=factor(Group, levels=group_vec)) %>%
    dplyr::group_by(Group) %>%
    dplyr::summarize(Count=n(), RCImean=mean(RCI), RCIsem=sem(RCI))
temp1 <- tibble(ModuleID="All")
for (group in group_vec) {
    rcimean <- temp$RCImean[which(temp$Group==group)]
    rcisem <- temp$RCIsem[which(temp$Group==group)]
    count <- temp$Count[which(temp$Group==group)]
    temp1 <- temp1 %>%
        dplyr::mutate("{group}_N":=!!count,
                      "{group}_RCImean":=!!rcimean,
                      "{group}_RCIsem":=!!rcisem)
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##Merge
temp <- dplyr::left_join(temp1, summary_tbl, by="ModuleID")
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

In [None]:
#Create a workbook object to save as one single .xlsx file
workbook <- createWorkbook()

#Prepare module metadata sheet
sheetName <- "ModuleMetadata"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, module_meta)

#Save the summary table as a new sheet
sheetName <- "RCImean"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)

#Save the workbook as one single .xlsx file
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

# — †1. Go back to †1 of the main Python notebook —  

## 4. Rank conservation index: inter-group module comparison

> Test specific hypothesis: control RCI == intervention RCI (i.e., inter-group module comparison).  
> 1. Testing the main effect of intervention on rank mathing scores (RMSs) for each module using ANOVA model  
> 2. Then, performing post-hoc comparisons of RMSs between control vs. each intervention using Dunnett's test  
>  
> Since RCI is the mean of RMSs under the own phenotype consensus, not RCI but RMS is practically used to obtain variance around group consensus. As well as using sex-pooled rank consensus, sex is NOT included in the ANOVA model, which is a change from version 2-2. Note that ANOVA can shrink the variance utilizing all samples (per module), whose statistical power is better than the repeated Welch's t-tests (version 2-3) in the case of small sample size. Although tricky, the P-value adjustment in (1) is performed across modules under the assumption that modules are independent, which would be more conservative and less likely raise referees' eyebrows for venn diagram-type summary than using nominal P-value cutoff. Because the post-hoc comparisons (2) are to address the effect of each intervention within a specific module, the P-values are adjusted across interventions only within the module (not across modules) using Dunnett's test.  

### 4-1. Extract RMS under the own phenotype consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the own phenotype
phenotype_vec <- rci_tbl %>%
    dplyr::select(-ModuleID, -Template) %>%
    names()
temp <- tibble(ModuleID=unique(rms_tbl$ModuleID))
for (k in phenotype_vec) {
    temp1 <- sample_meta %>%
        dplyr::filter(Phenotype==!!k) %>%
        .$SampleID
    temp <- rms_tbl %>%
        dplyr::filter(Template==!!k) %>%
        dplyr::select(ModuleID, all_of(temp1)) %>%
        dplyr::left_join(temp, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp[, 1:10])

rms_kk <- temp

### 4-2. ANOVA test (RMS ~ Intervention), followed by Dunnett's test (Intervention)

#### 4-2-1. Simultaneously perform all tests

In [None]:
#Prepare DF
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m")))
print(nrow(temp))
head(temp)

#Simultaneously perform all tests using tidyr::nest()
temp <- temp %>%
    dplyr::group_by(ModuleID) %>%
    tidyr::nest() %>%#New column name becomes "data"
    dplyr::mutate(ANOVA=lapply(data, function(tbl) {aov(RMS~Intervention, data=tbl)}),
                  Dunnett=lapply(data, function(tbl) {
                      glht(aov(RMS~Intervention, data=tbl),
                           linfct=mcp(Intervention="Dunnett"),
                           alternative="two.sided")})) %>%
    dplyr::ungroup()
print(nrow(temp))
print(head(temp))#print() because Jupyter Lab tries to display list contents

model <- temp

In [None]:
#Check result objects
summary(model$ANOVA[[1]])
summary(model$Dunnett[[1]])

#### 4-2-2. Summarize all result objects into a table

In [None]:
#Prepare variable labels
variable_vec <- rownames(summary(model$ANOVA[[1]])[[1]]) %>%
    str_replace(., " *$", "")#Remove white spaces
variable_vec <- variable_vec[1:(length(variable_vec)-1)]#Remove Residuals

#Prepare summary table of ANOVA tests
temp1 <- model %>%
    dplyr::select(ModuleID, ANOVA)
for (i in 1:length(variable_vec)) {
    label <- variable_vec[i]
    temp1 <- temp1 %>%
        dplyr::mutate("{label}_DF":=sapply(ANOVA, function(aov) {summary(aov)[[1]]$Df[i]}),
                      "{label}_SumSq":=sapply(ANOVA, function(aov) {summary(aov)[[1]]$`Sum Sq`[i]}),
                      "{label}_MeanSq":=sapply(ANOVA, function(aov) {summary(aov)[[1]]$`Mean Sq`[i]}),
                      "{label}_Fstat":=sapply(ANOVA, function(aov) {summary(aov)[[1]]$`F value`[i]}),
                      "{label}_Pval":=sapply(ANOVA, function(aov) {summary(aov)[[1]]$`Pr(>F)`[i]})) %>%
        #P-value adjustment with the Benjamini-Hochberg method
        ##Using !!as.name() in the following line, because simple {{}} and !! didn't recognize?
        dplyr::mutate("{label}_AdjPval":=p.adjust(!!as.name(str_c(label,"_Pval")), method="BH"))
}
temp1 <- temp1 %>%
    dplyr::select(-ANOVA)
print(str_c("nrow: ",nrow(temp1)))
head(temp1)

#Prepare contrast labels
contrast_vec <- names(summary(model$Dunnett[[1]])$test$coefficients) %>%
    str_replace(., " - ", "-vs-")

#Prepare summary table
temp2 <- model %>%
    dplyr::select(ModuleID, Dunnett)
for (i in 1:length(contrast_vec)) {
    label <- contrast_vec[i]
    temp2 <- temp2 %>%
        dplyr::mutate("{label}_Coef":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$coefficients[i])}),
                      "{label}_CoefSE":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$sigma[i])}),
                      "{label}_tStat":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$tstat[i])}),
                      "{label}_AdjPval":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$pvalues[i])}))
}
temp2 <- temp2 %>%
    dplyr::select(-Dunnett)
print(str_c("nrow: ",nrow(temp2)))
head(temp2)

#Merge
temp <- dplyr::left_join(temp1, temp2, by="ModuleID")
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> –> These warnings were due to the models having invariable values (x models x 3 contrasts x 4 sapply() calls).  

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m"))) %>%
    dplyr::group_by(ModuleID, Intervention) %>%
    dplyr::summarize(Count=n(), RMSmean=mean(RMS), RMSsem=sem(RMS)) %>%
    dplyr::ungroup()
temp1 <- module_meta %>%
    dplyr::select(ModuleID, ModuleName)
for (group in group_vec) {
    temp1 <- temp %>%
        dplyr::filter(Intervention==!!group) %>%
        dplyr::select(-Intervention) %>%
        dplyr::rename("{group}_N":=Count,
                      "{group}_RMSmean":=RMSmean,
                      "{group}_RMSsem":=RMSsem) %>%
        dplyr::left_join(temp1, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##Merge
temp <- dplyr::left_join(temp1, summary_tbl, by="ModuleID") %>%
    dplyr::arrange(Intervention_Pval)
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> (Note that the beta-coefficient estimate is equivalent to the difference in the mean of RMSs; e.g., Acar-vs-Cont_Coef = Acar_RMSmean - Cont_RMSmean.)  

In [None]:
#Save by appending to the existing .xlsx file
##Load the existing .xlsx file as a new workbook object
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
workbook <- loadWorkbook(str_c(fileDir,ipynbName,fileName))
##Add the tibble object as a new sheet
sheetName <- "RCI"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)
##Update the existing .xlsx file
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

#Save this module order to use later in this notebook
temp <- summary_tbl %>%
    dplyr::select(ModuleID, ModuleName)
module_meta <- module_meta %>%
    dplyr::select(-ModuleName) %>%
    dplyr::left_join(temp, ., by="ModuleID")
head(module_meta)

# — †2. Go back to †2 of the main Python notebook —  

## 5. Rank matching score under a fixed consensus: inter-group module comparison

> Test specific hypotheses: control RMS mean == intervention RMS mean per a fixed rank consensus for each module (i.e., inter-group module comparison).  
>
> 3. performing Dunnett's tests per rank consensus for each module.  
>  
> Because the main interests are whether the changed patterns are similar or not in a changed module (i.e., this is a downstream analysis of RCI), ANOVA and the p-value adjustment across modules are unnecessary. In all the (3) tests for each rank consensus, the Dunnett's test p-values are NOT further adjusted across all tests because the hypotheses across rank consensus are not independent. In each Dunnett's test, the rank consensus group is eliminated because its mean of RMS is RCI (i.e., expected mean and variance is different) and because the number of hypotheses can be reduced.  

### 5-1. RMS under Ctrl consensus

In [None]:
template <- "Control"

#### 5-1-1. Extract RMS under the fixed consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the fixed phenotype
temp <- rms_tbl %>%
    dplyr::filter(Template==!!template) %>%
    dplyr::select(-Template)
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp[, 1:10])

rms_kk <- temp

#### 5-1-2. Simultaneously perform all tests (Dunnett's test)

> Skip because template is Control.  

#### 5-1-3. Summarize all result objects into a table

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m"))) %>%
    dplyr::group_by(ModuleID, Intervention) %>%
    dplyr::summarize(Count=n(), RMSmean=mean(RMS), RMSsem=sem(RMS)) %>%
    dplyr::ungroup()
temp1 <- module_meta %>%
    dplyr::select(ModuleID, ModuleName)
for (group in group_vec) {
    temp1 <- temp %>%
        dplyr::filter(Intervention==!!group) %>%
        dplyr::select(-Intervention) %>%
        dplyr::rename("{group}_N":=Count,
                      "{group}_RMSmean":=RMSmean,
                      "{group}_RMSsem":=RMSsem) %>%
        dplyr::left_join(temp1, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##(Module order follows the updated module_meta based on the Intervention effect of ANOVA (4-2-2))

summary_tbl <- temp1

In [None]:
#Save by appending to the existing .xlsx file
##Load the existing .xlsx file as a new workbook object
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
workbook <- loadWorkbook(str_c(fileDir,ipynbName,fileName))
##Add the tibble object as a new sheet
sheetName <- "Cont-fixed-RMSmean"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)
##Update the existing .xlsx file
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

### 5-2. RMS under Aca consensus

In [None]:
template <- "Acarbose"

#### 5-2-1. Extract RMS under the fixed consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the fixed phenotype
temp <- rms_tbl %>%
    dplyr::filter(Template==!!template) %>%
    dplyr::select(-Template)
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp[, 1:10])

rms_kk <- temp

#### 5-2-2. Simultaneously perform all tests (Dunnett's test)

In [None]:
#Prepare DF
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::filter(Phenotype!=!!template) %>%#Eliminate the case of own consensus
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m")))
print(nrow(temp))
head(temp)

#Simultaneously perform all tests using tidyr::nest()
temp <- temp %>%
    dplyr::group_by(ModuleID) %>%
    tidyr::nest() %>%#New column name becomes "data"
    dplyr::mutate(Dunnett=lapply(data, function(tbl) {
                      glht(aov(RMS~Intervention, data=tbl),
                           linfct=mcp(Intervention="Dunnett"),
                           alternative="two.sided")})) %>%
    dplyr::ungroup()
print(nrow(temp))
print(head(temp))#print() because Jupyter Lab tries to display list contents

model <- temp

In [None]:
#Check result objects
summary(model$Dunnett[[1]])
summary(model$Dunnett[[2]])

#### 5-2-3. Summarize all result objects into a table

In [None]:
#Prepare contrast labels
contrast_vec <- names(summary(model$Dunnett[[1]])$test$coefficients) %>%
    str_replace(., " - ", "-vs-")

#Prepare summary table
temp <- model %>%
    dplyr::select(ModuleID, Dunnett)
for (i in 1:length(contrast_vec)) {
    label <- contrast_vec[i]
    temp <- temp %>%
        dplyr::mutate("{label}_Coef":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$coefficients[i])}),
                      "{label}_CoefSE":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$sigma[i])}),
                      "{label}_tStat":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$tstat[i])}),
                      "{label}_AdjPval":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$pvalues[i])}))
}
temp <- temp %>%
    dplyr::select(-Dunnett)
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> –> These warnings were due to the models having invariable values (x models x 2 contrasts x 4 sapply() calls).  

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m"))) %>%
    dplyr::group_by(ModuleID, Intervention) %>%
    dplyr::summarize(Count=n(), RMSmean=mean(RMS), RMSsem=sem(RMS)) %>%
    dplyr::ungroup()
temp1 <- module_meta %>%
    dplyr::select(ModuleID, ModuleName)
for (group in group_vec) {
    temp1 <- temp %>%
        dplyr::filter(Intervention==!!group) %>%
        dplyr::select(-Intervention) %>%
        dplyr::rename("{group}_N":=Count,
                      "{group}_RMSmean":=RMSmean,
                      "{group}_RMSsem":=RMSsem) %>%
        dplyr::left_join(temp1, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##Merge (Module order follows the updated module_meta based on the Intervention effect of ANOVA (4-2-2))
temp <- dplyr::left_join(temp1, summary_tbl, by="ModuleID")
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> (Note that the beta-coefficient estimate is equivalent to the difference in the mean of RMSs; e.g., Acar-vs-Cont_Coef = Acar_RMSmean - Cont_RMSmean.)  

In [None]:
#Save by appending to the existing .xlsx file
##Load the existing .xlsx file as a new workbook object
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
workbook <- loadWorkbook(str_c(fileDir,ipynbName,fileName))
##Add the tibble object as a new sheet
sheetName <- "Acar-fixed-RMSmean"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)
##Update the existing .xlsx file
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

### 5-3. RMS under Rapa consensus

In [None]:
template <- "Rapamycin"

#### 5-3-1. Extract RMS under the fixed consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the fixed phenotype
temp <- rms_tbl %>%
    dplyr::filter(Template==!!template) %>%
    dplyr::select(-Template)
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp[, 1:10])

rms_kk <- temp

#### 5-3-2. Simultaneously perform all tests (Dunnett's test)

In [None]:
#Prepare DF
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::filter(Phenotype!=!!template) %>%#Eliminate the case of own consensus
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m")))
print(nrow(temp))
head(temp)

#Simultaneously perform all tests using tidyr::nest()
temp <- temp %>%
    dplyr::group_by(ModuleID) %>%
    tidyr::nest() %>%#New column name becomes "data"
    dplyr::mutate(Dunnett=lapply(data, function(tbl) {
                      glht(aov(RMS~Intervention, data=tbl),
                           linfct=mcp(Intervention="Dunnett"),
                           alternative="two.sided")})) %>%
    dplyr::ungroup()
print(nrow(temp))
print(head(temp))#print() because Jupyter Lab tries to display list contents

model <- temp

In [None]:
#Check result objects
summary(model$Dunnett[[1]])
summary(model$Dunnett[[2]])

#### 5-3-3. Summarize all result objects into a table

In [None]:
#Prepare contrast labels
contrast_vec <- names(summary(model$Dunnett[[1]])$test$coefficients) %>%
    str_replace(., " - ", "-vs-")

#Prepare summary table
temp <- model %>%
    dplyr::select(ModuleID, Dunnett)
for (i in 1:length(contrast_vec)) {
    label <- contrast_vec[i]
    temp <- temp %>%
        dplyr::mutate("{label}_Coef":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$coefficients[i])}),
                      "{label}_CoefSE":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$sigma[i])}),
                      "{label}_tStat":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$tstat[i])}),
                      "{label}_AdjPval":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$pvalues[i])}))
}
temp <- temp %>%
    dplyr::select(-Dunnett)
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> –> These warnings were due to the models having invariable values (x models x 2 contrasts x 4 sapply() calls).  

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m"))) %>%
    dplyr::group_by(ModuleID, Intervention) %>%
    dplyr::summarize(Count=n(), RMSmean=mean(RMS), RMSsem=sem(RMS)) %>%
    dplyr::ungroup()
temp1 <- module_meta %>%
    dplyr::select(ModuleID, ModuleName)
for (group in group_vec) {
    temp1 <- temp %>%
        dplyr::filter(Intervention==!!group) %>%
        dplyr::select(-Intervention) %>%
        dplyr::rename("{group}_N":=Count,
                      "{group}_RMSmean":=RMSmean,
                      "{group}_RMSsem":=RMSsem) %>%
        dplyr::left_join(temp1, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##Merge (Module order follows the updated module_meta based on the Intervention effect of ANOVA (4-2-2))
temp <- dplyr::left_join(temp1, summary_tbl, by="ModuleID")
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> (Note that the beta-coefficient estimate is equivalent to the difference in the mean of RMSs; e.g., Acar-vs-Cont_Coef = Acar_RMSmean - Cont_RMSmean.)  

In [None]:
#Save by appending to the existing .xlsx file
##Load the existing .xlsx file as a new workbook object
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
workbook <- loadWorkbook(str_c(fileDir,ipynbName,fileName))
##Add the tibble object as a new sheet
sheetName <- "Rapa-fixed-RMSmean"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)
##Update the existing .xlsx file
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

### 5-4. RMS under CR consensus

In [None]:
template <- "Calorie restriction"

#### 5-4-1. Extract RMS under the fixed consensus

In [None]:
#Extract RMS whose template phenotype corresponds to the fixed phenotype
temp <- rms_tbl %>%
    dplyr::filter(Template==!!template) %>%
    dplyr::select(-Template)
print(str_c("nrow: ",nrow(temp)))
head(temp)
summary(temp[, 1:10])

rms_kk <- temp

#### 5-4-2. Simultaneously perform all tests (Dunnett's test)

In [None]:
#Prepare DF
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::filter(Phenotype!=!!template) %>%#Eliminate the case of own consensus
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m")))
print(nrow(temp))
head(temp)

#Simultaneously perform all tests using tidyr::nest()
temp <- temp %>%
    dplyr::group_by(ModuleID) %>%
    tidyr::nest() %>%#New column name becomes "data"
    dplyr::mutate(Dunnett=lapply(data, function(tbl) {
                      glht(aov(RMS~Intervention, data=tbl),
                           linfct=mcp(Intervention="Dunnett"),
                           alternative="two.sided")})) %>%
    dplyr::ungroup()
print(nrow(temp))
print(head(temp))#print() because Jupyter Lab tries to display list contents

model <- temp

In [None]:
#Check result objects
summary(model$Dunnett[[1]])
summary(model$Dunnett[[2]])

#### 5-4-3. Summarize all result objects into a table

In [None]:
#Prepare contrast labels
contrast_vec <- names(summary(model$Dunnett[[1]])$test$coefficients) %>%
    str_replace(., " - ", "-vs-")

#Prepare summary table
temp <- model %>%
    dplyr::select(ModuleID, Dunnett)
for (i in 1:length(contrast_vec)) {
    label <- contrast_vec[i]
    temp <- temp %>%
        dplyr::mutate("{label}_Coef":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$coefficients[i])}),
                      "{label}_CoefSE":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$sigma[i])}),
                      "{label}_tStat":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$tstat[i])}),
                      "{label}_AdjPval":=sapply(Dunnett, function(glht) {unname(summary(glht)$test$pvalues[i])}))
}
temp <- temp %>%
    dplyr::select(-Dunnett)
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> –> These warnings were due to the models having invariable values (x models x 2 contrasts x 4 sapply() calls).  

In [None]:
#Add general statistics
##Calculate general statistics
sem <- function(x) {sd(x)/sqrt(length(x))}
temp <- rms_kk %>%
    tidyr::gather(key=SampleID, value=RMS, -ModuleID) %>%
    dplyr::left_join(., sample_meta, by="SampleID") %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Control", "Cont")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Acarbose", "Acar")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Rapamycin", "Rapa")) %>%
    dplyr::mutate(Intervention=str_replace(Intervention, "Calorie restriction", "Calo")) %>%
    dplyr::mutate(Intervention=factor(Intervention, levels=group_vec),
                  Sex=factor(Sex, levels=c("F", "M")),
                  Age=factor(str_c(as.character(Age),"m"), levels=c("6m", "12m"))) %>%
    dplyr::group_by(ModuleID, Intervention) %>%
    dplyr::summarize(Count=n(), RMSmean=mean(RMS), RMSsem=sem(RMS)) %>%
    dplyr::ungroup()
temp1 <- module_meta %>%
    dplyr::select(ModuleID, ModuleName)
for (group in group_vec) {
    temp1 <- temp %>%
        dplyr::filter(Intervention==!!group) %>%
        dplyr::select(-Intervention) %>%
        dplyr::rename("{group}_N":=Count,
                      "{group}_RMSmean":=RMSmean,
                      "{group}_RMSsem":=RMSsem) %>%
        dplyr::left_join(temp1, ., by="ModuleID")
}
print(str_c("nrow: ",nrow(temp1)))
head(temp1)
##Merge (Module order follows the updated module_meta based on the Intervention effect of ANOVA (4-2-2))
temp <- dplyr::left_join(temp1, summary_tbl, by="ModuleID")
print(str_c("nrow: ",nrow(temp)))
head(temp)

summary_tbl <- temp

> (Note that the beta-coefficient estimate is equivalent to the difference in the mean of RMSs; e.g., Acar-vs-Cont_Coef = Acar_RMSmean - Cont_RMSmean.)  

In [None]:
#Save by appending to the existing .xlsx file
##Load the existing .xlsx file as a new workbook object
fileDir <- "./ExportData/"
ipynbName <- "220522_LC-M001-related-transcriptomics-DIRAC_StatisticalTest-GOBP_ver2-4_"
fileName <- "inter-group-comparison.xlsx"
workbook <- loadWorkbook(str_c(fileDir,ipynbName,fileName))
##Add the tibble object as a new sheet
sheetName <- "Calo-fixed-RMSmean"
addWorksheet(workbook, sheetName=sheetName)
writeData(workbook, sheetName, summary_tbl)
##Update the existing .xlsx file
saveWorkbook(workbook, file=str_c(fileDir,ipynbName,fileName), overwrite=TRUE)

# — †3. Go back to †3 of the main Python notebook —  

# — Session information —

In [None]:
sessionInfo()