In [None]:
%load_ext rpy2.ipython

In [None]:
%%R

scratch_path <- Sys.getenv("SCRATCH")
.libPaths(file.path(scratch_path, "Rlocal4.3.2"))

In [None]:
%%R
library(DESeq2)
library(magrittr)
library(SummarizedExperiment)

start_time <- Sys.time()

IN_DIR <- "../../inputs/"
OUT_DIR <- "../../outputs/validation/"

GEO_model_validation_DATA_FIL <- "rse_gene_20.Rdata"

ensembl2rxns.df <- read.table(paste(IN_DIR,"Ensembl2ReactomeReactions.txt",sep=""),
                              sep="\t")

load(paste(IN_DIR,GEO_model_validation_DATA_FIL,sep=""))

In [None]:
%%R
tissue_data <- colData(rse_gene_20)
print(tissue_data)

In [None]:
%%R
tissue_counts <- table(tissue_data$Major_tissue)

tissues_to_remove <- names(tissue_counts[tissue_counts <= 20])

rse_gene_20 <- rse_gene_20[, !(tissue_data$Major_tissue %in% c(tissues_to_remove, 'Spleen'))]

new_tissue_counts <- table(colData(rse_gene_20)$Major_tissue)
print(new_tissue_counts)

row_totals <- rowSums(assays(rse_gene_20)$raw_counts)

rse_gene_20 <- rse_gene_20[row_totals > 0, ]

In [None]:
%%R
GEO_model_validation.cols <- rse_gene_20 %>% colData()
print(GEO_model_validation.cols$Major_tissue)

In [None]:
%%R
saveRDS(GEO_model_validation.cols$Major_tissue,file=paste(OUT_DIR,"GEO_model_validation_tissue_vec.Rds",sep=""))

In [None]:
%%R
saveRDS(GEO_model_validation.cols$external_id,file=paste(OUT_DIR,"GEO_model_validation_sample_detail_vec.Rds",sep=""))

In [None]:
%%R
GEO_model_validation_df_col_data <- colData(rse_gene_20)
head(GEO_model_validation_df_col_data$Row.names)
sample_id_list <- GEO_model_validation_df_col_data$external_id

In [None]:
%%R
print(sample_id_list)

In [None]:
%%R
GEO_model_validation.df <- rse_gene_20 %>% SummarizedExperiment::assay() %>% as.data.frame()
print(GEO_model_validation.df)

In [None]:
%%R
ensembl_wo_ids <- gsub("\\.[0-9]+","",rownames(GEO_model_validation.df))
print(ensembl_wo_ids)

In [None]:
%%R
rownames(GEO_model_validation.df) <- ensembl_wo_ids
print(rownames(GEO_model_validation.df))

In [None]:
%%R
print(ensembl2rxns.df$V1)

In [None]:
%%R
reactome_ensembl_ids <- intersect(ensembl2rxns.df$V1,ensembl_wo_ids)
print(length(reactome_ensembl_ids))

In [None]:
%%R
saveRDS(reactome_ensembl_ids,file=paste(OUT_DIR,"reactome_ensembl_ids.Rds",sep=""))

In [None]:
%%R
GEO_model_validation.df <- GEO_model_validation.df[reactome_ensembl_ids,]
length(GEO_model_validation.df)

In [None]:
%%R
saveRDS(GEO_model_validation.df,file=paste(OUT_DIR,"GEO_model_validation_df.Rds",sep=""))