In [1]:
%load_ext rpy2.ipython

In [2]:
%%R
library(DESeq2)
library(magrittr)
library(SummarizedExperiment)
library(dplyr)

start_time <- Sys.time()

IN_DIR <- "/mnt/home/yuankeji/RanceLab/reticula_new/reticula/data/tcdd/input/"
OUT_DIR <- "/mnt/home/yuankeji/RanceLab/reticula_new/reticula/data/tcdd/output/"

GTEx_DATA_DIR <- IN_DIR
GTEx_DATA_FIL <- "rse_tcdd_data.Rdata"

ensembl2rxns.df <- read.table(paste(IN_DIR,"Ensembl2ReactomeReactions.txt",sep=""),
                              sep="\t")

load(paste(GTEx_DATA_DIR,GTEx_DATA_FIL,sep=""))

Loading required package: S4Vectors
Loading required package: stats4
Loading required package: BiocGenerics

Attaching package: ‘BiocGenerics’

The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs

The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Attaching package: ‘S4Vectors’

The following object is masked from ‘package:utils’:

    findMatches

The following objects are masked from ‘package:base’:

    expand.grid, I, unname

Loading required package: IRanges
Loading required package: GenomicRanges
Loading required package: GenomeInfoDb
Loading require

In [None]:
%%R

variances <- apply(assay(final_result), 1, var)

zero_variance_rows <- which(variances == 0)

if (length(zero_variance_rows) > 0) {
    deleted_data <- assay(final_result)[zero_variance_rows, , drop = FALSE]
    save(deleted_data, file=paste0(OUT_DIR, "deleted_data.RData"))
    new_assays <- assay(final_result)[-zero_variance_rows, , drop = FALSE]
    new_row_data <- rowData(final_result)[-zero_variance_rows, , drop = FALSE]

    new_final_result <- SummarizedExperiment(
        assays = SimpleList(counts = new_assays),
        rowData = new_row_data,
        colData = colData(final_result)
    )
    
    final_result <- new_final_result
    
    print(dim(assay(final_result)))
    print(dim(rowData(final_result)))
} else {
    cat("No rows with zero variance found.\n")
}

print(head(assay(final_result)))

In [None]:
%%R
library(tibble)

keep_samples <- colData(final_result)$dose %in% c(0.03, 30.00)

final_result <- final_result[, keep_samples]

temp_df <- as_tibble(colData(final_result)) %>%
  filter(!grepl("SRP131784", study))

tcdd_data <- temp_df
final_result <- final_result[, colData(final_result)$external_id %in% temp_df$external_id]

print(dim(final_result))
print(assay(final_result))
print(dim(tcdd_data))
print(tcdd_data)

In [None]:
%%R
dose_counts <- table(tcdd_data$dose)

print(dose_counts)

dose_counts_df <- as.data.frame(dose_counts)

colnames(dose_counts_df) <- c("Dose", "Count")

write.csv(dose_counts_df, paste0(OUT_DIR, "dose_counts0.03vs30_time_course.csv"), row.names=FALSE)


0.03   30 
  23   68 


In [6]:
%%R
combined_gender <- ifelse(tcdd_data$Sex != "", tcdd_data$Sex, tcdd_data$gender)
print(combined_gender)

 [1] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
 [9] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[17] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[25] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[33] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[41] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[49] "female" "female" "female" "female" "female" "female" "female" "female"
[57] "female" "female" "female" "female" "female" "female" "female" "female"
[65] "female" "female" "female" "female" "female" "female" "female" "female"
[73] "female" "female" "female" "female" "female" "female" "female" "male"  
[81] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
[89] "male"   "male"   "male"  


In [7]:
%%R
write.table(tcdd_data$project_id,file=paste(OUT_DIR,"tcdd_project_id0.03vs30_time_course.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

In [8]:
%%R
write.table(combined_gender,file=paste(OUT_DIR,"tcdd_gender0.03vs30_time_course.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

In [9]:
%%R
write.table(tcdd_data$external_id,file=paste(OUT_DIR,"tcdd_sample_id0.03vs30_time_course.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

In [10]:
%%R
write.table(tcdd_data$dose,file=paste(OUT_DIR,"tcdd_dose0.03vs30_time_course.txt",sep=""), row.names = FALSE, col.names = FALSE, sep = "\t")

In [11]:
%%R
saveRDS(tcdd_data$dose,file=paste(OUT_DIR,"tcdd_dose_detail_vec0.03vs30_time_course.Rds",sep=""))

In [12]:
%%R
saveRDS(tcdd_data$external_id,file=paste(OUT_DIR,"tcdd_sample_detail_vec0.03vs30_time_course.Rds",sep=""))

In [None]:
%%R
tcdd.df <- final_result %>% SummarizedExperiment::assay() %>% as.data.frame()
print(tcdd.df)

In [14]:
%%R
setdiff(rownames(tcdd.df), rownames(final_result))

character(0)


In [15]:
%%R
colnames(tcdd.df) <- colData(final_result)$external_id

In [16]:
%%R
ensembl_wo_ids <- gsub("\\.[0-9]+","",rownames(tcdd.df))
# print(ensembl_wo_ids)

In [17]:
%%R
deleted_ensembl_wo_ids <- gsub("\\.[0-9]+","",rownames(deleted_data))
print(length(deleted_ensembl_wo_ids))

[1] 12169


In [18]:
%%R
rownames(tcdd.df) <- ensembl_wo_ids
# print(rownames(tcdd.df))

In [None]:
%%R
rownames(deleted_data) <- deleted_ensembl_wo_ids
print(rownames(deleted_data))

In [None]:
%%R
reactome_ensembl_ids <- intersect(ensembl2rxns.df$V1,ensembl_wo_ids)
print(reactome_ensembl_ids)

In [21]:
%%R
saveRDS(reactome_ensembl_ids,file=paste(OUT_DIR,"reactome_ensembl_ids0.03vs30.Rds",sep=""))

In [22]:
%%R
tcdd.df <- tcdd.df[reactome_ensembl_ids,]

In [23]:
%%R
saveRDS(tcdd.df,file=paste(OUT_DIR,"tcdd_df0.03vs30_time_course.Rds",sep=""))

In [24]:
%%R
saveRDS(deleted_data, file=paste(OUT_DIR, "format_name_deleted_data0.03vs30_time_course.Rds", sep=""))