In [None]:
%load_ext rpy2.ipython

In [None]:
%%R

scratch_path <- Sys.getenv("SCRATCH")
.libPaths(file.path(scratch_path, "Rlocal4.3.2"))

In [None]:
%%R
library(magrittr)
library(ggplot2)
library(ggiraph)
library(dplyr)

OUT_DIR <- "../../../outputs/validation/"
OUT_DIR2 <- "../../../outputs/validation/output_ARI/"

In [None]:
%%R
rxn2ensembls.nls <- readRDS(paste(OUT_DIR, "rxn2ensembls_nls.Rds", sep = ""))
rxn_knn_misclass_rate.nls <- readRDS(paste(OUT_DIR, "toi_rxn_knn_misclass_rate_nls.Rds", sep = ""))
rxn_knn_ari.nls <- readRDS(paste(OUT_DIR, "toi_rxn_knn_ari_nls.Rds", sep = ""))
rxn_knn_ecount.nls <- readRDS(paste(OUT_DIR, "toi_rxn_knn_ecount_nls.Rds", sep = ""))
tcga_tissue_detail.vec.train <- readRDS(paste(OUT_DIR,"tcga_tissue_detial_vec_train.Rds",sep=""))
vst.count.mtx.train <- readRDS(paste(OUT_DIR,"vst_count_mtx_train.Rds",sep=""))
rxn_pca.nls <- readRDS(paste(OUT_DIR,"rxn_pca_nls.Rds",sep=""))

In [None]:
%%R
head(rxn2ensembls.nls)

In [None]:
%%R
head(rxn_knn_misclass_rate.nls)

In [None]:
%%R
head(rxn_knn_ari.nls)

In [None]:
%%R
head(rxn_knn_ecount.nls)

In [None]:
%%R
print(length(unique(tcga_tissue_detail.vec.train)))
print(tcga_tissue_detail.vec.train)

In [None]:
%%R
head(vst.count.mtx.train)

In [None]:
%%R
head(rxn_pca.nls)

In [None]:
%%R
# construct summary data frame
rxn_tissue_mean_misclass.df <- as.data.frame(
                                      sapply(as.data.frame(
                                                   do.call(rbind, rxn_knn_misclass_rate.nls)),
                                             as.numeric))
rownames(rxn_tissue_mean_misclass.df) <- names(rxn_knn_misclass_rate.nls)
rxn_tissue_mean_misclass.df$RXN_ID <- names(rxn_knn_misclass_rate.nls)
rxn_tissue_mean_misclass.df$ARI <- unlist(rxn_knn_ari.nls)
rxn_tissue_mean_misclass.df$ECOUNT <- unlist(rxn_knn_ecount.nls)

length(rxn_tissue_mean_misclass.df)

In [None]:
%%R
write.csv(rxn_tissue_mean_misclass.df, file=paste(OUT_DIR, "rxn_tissue_mean_misclass.csv", sep=""))

In [None]:
%%R
misclass_only.df <- rxn_tissue_mean_misclass.df[1:16]
# print(dim(rxn_tissue_mean_misclass.df))
head(misclass_only.df)

In [None]:
%%R
# store summary data frame
saveRDS(rxn_tissue_mean_misclass.df, paste(OUT_DIR, "toi_summary_df.Rds", sep = ""))

In [None]:
%%R
misclass_only.df <- rxn_tissue_mean_misclass.df[1:15]

numeric_data <- misclass_only.df %>% 
                select_if(~is.numeric(.))

# generate dendrogram
df <- scale(t(numeric_data))
d <- parallelDist::parallelDist(df, method = "euclidean")
saveRDS(d,file=paste(OUT_DIR,"misclass_dist_obj.Rds",sep=""))
hc1 <- hclust(d, method = "ward.D2" )
saveRDS(hc1,file=paste(OUT_DIR,"misclass_hc_obj.Rds",sep=""))
hc1 <- readRDS(paste(OUT_DIR,"misclass_hc_obj.Rds",sep=""))
dend1 <- as.dendrogram(hc1)
plot(hc1, cex = 2)

In [None]:
%%R
# generate figures using summary data frame
for(tis_idx in seq(1:15)){
  tis_name <- colnames(rxn_tissue_mean_misclass.df) %>% .[tis_idx]
  print(tis_name)
  if(!is.numeric(rxn_tissue_mean_misclass.df[[tis_name]])) next
    
  sorted.df <- rxn_tissue_mean_misclass.df %>% dplyr::arrange(ECOUNT)

  plot.obj <- ggplot2::ggplot(sorted.df) + 
    ggiraph::geom_point_interactive(aes(x=ARI,
                                      y=1 - !!as.name(tis_name),
                                      colour=ECOUNT,
                                      tooltip=RXN_ID,
                                      data_id=RXN_ID)) +
    theme_bw() + 
    ggtitle(paste("ARI vs ",tis_name," 1 - misclassification rate",sep=""))

  #girafe(ggobj = plot.obj)
  print(paste("ARI_v_",tis_name))
  ggsave(paste(OUT_DIR2,"ARI_v_",tis_name,"_misclassification.png"),device = png())  
  dev.off()
}

In [None]:
%%R
liver_columns <- grep("Liver", colnames(rxn_tissue_mean_misclass.df), value = TRUE)

for(liver_col in liver_columns) {
    sorted_df <- rxn_tissue_mean_misclass.df %>%
        dplyr::arrange(desc(!!as.name(liver_col))) %>%
        dplyr::slice(1:10)
    
    pca_scores <- lapply(sorted_df$RXN_ID, function(rxn_id) {
        rxn_pca.nls[[rxn_id]]
    })
    
    pca_scores_df <- do.call(rbind, pca_scores)
    
    print(pca_scores_df)
    write.csv(pca_scores_df, file=paste(OUT_DIR2, "Desc_PCA_scores_for_", liver_col, ".csv", sep=""))
}
