In [13]:
args <- commandArgs(trailingOnly = TRUE)  # Get arguments passed to the script
p_thresh <- as.numeric(args[1])
split_status <- tolower(args[2])  # Convert the second argument to lowercase for consistency

# split_status = 'true'
# p_thresh = 0.05

options(warn=-1)

library(edgeR)
library(limma)

assay_to_use <- 3
assayname_to_use <- "scalelogcounts"
file_path = '/tmp/work/RCproject/GEO_singlecellexperiment.rds'
p_thresh = p_thresh

run_limma <- function(spe, assay_to_use, verbose = FALSE, p.value = 0.05, lfc = 0.0){

  design <- model.matrix(~0 + Response + batch, data = colData(spe))

  if(verbose) print(colnames(design))
  
  colnames(design) <- gsub("^Response","",colnames(design))
  colnames(design) <- gsub(" ","_",colnames(design))
  
  if(verbose) print(colnames(design))
    
  contr.matrix <- makeContrasts(
   Yes_vs_NO = yes - no,
   levels = colnames(design))

  v <- assay(spe, assay_to_use)

  fit <- lmFit(v, design = design)
  
  fit_contrast <- contrasts.fit(fit, contrasts = contr.matrix)

  efit <- eBayes(fit_contrast, robust = TRUE)
  
  results_efit<- decideTests(efit, adjust.method = "none", p.value = p.value, lfc = lfc)
  summary_efit <- summary(results_efit)

  if(verbose) print(summary_efit)

  
  my_de_results <- topTable(efit, coef = 1, sort.by = "none", n = Inf) # sort by none

  sig_genes <- rownames(my_de_results[my_de_results$P.Value < p.value,])

  # We can obtain the DE results by using the `TopTable` function.

  return(sig_genes)
}

sce <- readRDS(file_path)

# Perform boolean check
if (split_status == "true") {
    train_samples <- readLines("train_samples.txt")
    sce <- sce[,train_samples]
} else if (split_status == "false") {
    print('Running diff expression on entire dataset')
} else {
    print('Running diff expression on entire dataset')
}


all_data_sets <- sce
de4_data_sets <- sce[, colData(sce)$Platform %in% c("GPL13497","GPL14951","GPL15207","GPL6102")]
rna_seq_data_sets <- sce[, colData(sce)$batch %in% c("GSE190826","GSE209746")]


DE_alldatasets <- run_limma(all_data_sets, assay_to_use = assay_to_use, verbose = TRUE, p.value = p_thresh, lfc = 0.0)
DE_4datasets <- run_limma(de4_data_sets, assay_to_use = assay_to_use, verbose = TRUE, p.value = p_thresh, lfc = 0.0)
DE_bulkDatasets <- run_limma(rna_seq_data_sets, assay_to_use = assay_to_use, verbose = TRUE, p.value = p_thresh, lfc = 0.0)

de_intersect_plus_bulk_genes <- intersect(union(DE_bulkDatasets,DE_4datasets),DE_alldatasets)

saveRDS(de_intersect_plus_bulk_genes,'ann_gene_set.rds')

 [1] "Responseno"         "Responseyes"        "batchGSE145037"    
 [4] "batchGSE150082"     "batchGSE190826"     "batchGSE209746"    
 [7] "batchGSE45404_GPL1" "batchGSE45404_GPL2" "batchGSE93375"     
[10] "batchGSE94104"     
 [1] "no"                 "yes"                "batchGSE145037"    
 [4] "batchGSE150082"     "batchGSE190826"     "batchGSE209746"    
 [7] "batchGSE45404_GPL1" "batchGSE45404_GPL2" "batchGSE93375"     
[10] "batchGSE94104"     
       Yes_vs_NO
Down         563
NotSig     11036
Up           566
[1] "Responseno"     "Responseyes"    "batchGSE150082" "batchGSE93375" 
[5] "batchGSE94104" 
[1] "no"             "yes"            "batchGSE150082" "batchGSE93375" 
[5] "batchGSE94104" 
       Yes_vs_NO
Down         367
NotSig     11372
Up           426
[1] "Responseno"     "Responseyes"    "batchGSE209746"
[1] "no"             "yes"            "batchGSE209746"
       Yes_vs_NO
Down         214
NotSig     11619
Up           332


In [15]:
length(de_intersect_plus_bulk_genes)

In [None]:
    def merge_and_align(self):
        arg_1 = '/'.join([self.output_directory,'pre_process_objects'])
        arg_2 = self.output_directory
        subprocess.call(['Rscript','merge_and_align.r',arg_1,arg_2])