In [None]:
%load_ext rpy2.ipython

In [None]:
%%R
library(DESeq2)
library(magrittr)
library(SummarizedExperiment)

start_time <- Sys.time()

IN_DIR <- "../../../data/GEO_TCDD_Dose_Response/input/"
OUT_DIR <- "../../../data/GEO_TCDD_Dose_Response/output/"

GEO_TCDD_Dose_Reponse_DATA_FIL <- "rse_tcdd_data.Rdata"

ensembl2rxns.df <- read.table(paste(IN_DIR,"Ensembl2ReactomeReactions.txt",sep=""),
                              sep="\t")

load(paste(IN_DIR,GEO_TCDD_Dose_Reponse_DATA_FIL,sep=""))

In [None]:
%%R
variances <- apply(assay(final_result), 1, var)
zero_variance_rows <- which(variances == 0)

if (length(zero_variance_rows) > 0) {
    deleted_data <- assay(final_result)[zero_variance_rows, , drop = FALSE]
    save(deleted_data, file=paste0(OUT_DIR, "deleted_data.RData"))
    new_assays <- assay(final_result)[-zero_variance_rows, , drop = FALSE]
    new_row_data <- rowData(final_result)[-zero_variance_rows, , drop = FALSE]

    new_final_result <- SummarizedExperiment(
        assays = SimpleList(counts = new_assays),
        rowData = new_row_data,
        colData = colData(final_result)
    )
    
    final_result <- new_final_result
    
    print(dim(assay(final_result)))
    print(dim(rowData(final_result)))
} else {
    cat("No rows with zero variance found.\n")
}

print(head(assay(final_result)))

In [None]:
%%R
GEO_TCDD_Dose_Response_data <- colData(final_result)

dose_counts <- table(GEO_TCDD_Dose_Response_data$dose)

print(dose_counts)

dose_counts_df <- as.data.frame(dose_counts)

colnames(dose_counts_df) <- c("Dose", "Count")

write.csv(dose_counts_df, paste0(OUT_DIR, "dose_counts.csv"), row.names=FALSE)

In [None]:
%%R
combined_gender <- ifelse(GEO_TCDD_Dose_Response_data$Sex != "", GEO_TCDD_Dose_Response_data$Sex,GEO_TCDD_Dose_Response_data$gender)

write.table(GEO_TCDD_Dose_Response_data$project_id,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_project_id.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

write.table(combined_gender,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_gender.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

write.table(GEO_TCDD_Dose_Response_data$SRR,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_sample_id.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

write.table(GEO_TCDD_Dose_Response_data$dose,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_dose.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

saveRDS(GEO_TCDD_Dose_Response_data$dose,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_dose_detail_vec.Rds",sep=""))

saveRDS(GEO_TCDD_Dose_Response_data$SRR,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_sampe_detail_vec.Rds",sep=""))


In [None]:
%%R
GEO_TCDD_Dose_Response.df <- final_result %>% SummarizedExperiment::assay() %>% as.data.frame()

ensembl_wo_ids <- gsub("\\.[0-9]+","",rownames(GEO_TCDD_Dose_Response.df))

deleted_ensembl_wo_ids <- gsub("\\.[0-9]+","",rownames(deleted_data))

rownames(GEO_TCDD_Dose_Response.df) <- ensembl_wo_ids

rownames(deleted_data) <- deleted_ensembl_wo_ids

reactome_ensembl_ids <- intersect(ensembl2rxns.df$V1,ensembl_wo_ids)

saveRDS(reactome_ensembl_ids,file=paste(OUT_DIR,"reactome_ensembl_ids.Rds",sep=""))

In [None]:
%%R
GEO_TCDD_Dose_Response.df <- GEO_TCDD_Dose_Response.df[reactome_ensembl_ids,]
saveRDS(GEO_TCDD_Dose_Response.df,file=paste(OUT_DIR,"GEO_TCDD_Dose_Response_df.Rds",sep=""))
saveRDS(deleted_data, file=paste(OUT_DIR, "GEO_TCDD_Dose_Response_format_name_deleted_data.Rds", sep=""))