In [91]:
library(tidyverse)
library(ggplot2)
library(patchwork)
source("helper.R")
library(pdftools)

In [92]:
sinfo_path = "../../output/sims_info.tsv"
sinfoh_path = "../../output/header_sims_info.tsv"

In [93]:
win_size = 1000000
chroms= c("chr12")
win_len = -1
padding = 0
rescf = 1
prop_acc=0.4

In [94]:
fig_prefixes = c("cor-pidxy-dT", "subset-chr12-landscapes","annot-correls-tmrca")
widths = c(100, 125, 100)
heights = c(25, 25, 35)
#fig_prefixes = c("annot-correls-tmrca")

In [95]:
sims_path = "../../output/sims_results/"
varmut_sims_path = "../../output/varmut/sims_results/"

In [96]:
# loading sims info table
sinfo = read.table(sinfo_path, sep="\t")
h = read.table(sinfoh_path, sep="\t", header=TRUE)
cnames = colnames(h)
if (length(sinfo) == length(h)+1) {
    cnames = c(cnames, "poscoefdecayeps")
}
colnames(sinfo) = cnames

In [97]:
#diff_cols = c("delrate","delcoef","posrate","poscoef","mu", "rescf", "win_len", "padding", "chr")
diff_cols = c("delrate","delcoef","posrate","poscoef","mu", "poscoefdecayeps", "rescf", "win_len", "padding", "chr")

In [98]:
wsize_str = paste0("win-size_",as.integer(win_size))

In [99]:
sims_folders = paste0(sims_path,list.files(sims_path)[grep(wsize_str, list.files(sims_path))])

In [100]:
varmut_sims_folders = paste0(varmut_sims_path,list.files(varmut_sims_path)[grep(wsize_str, list.files(varmut_sims_path))])

In [101]:
sims_folders = c(sims_folders, varmut_sims_folders)

In [102]:
simresults = as.data.frame(do.call(rbind, lapply(X = sims_folders, function(x) unlist(meta_from_fname(x,prop=prop_acc,suffix="$")))), stringsAsFactors =FALSE)

[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"


In [103]:
#sims_folders

In [104]:
simresults$folderpath = sims_folders

In [105]:
tmp = unique(sinfo[sinfo$sup_rand_id %in% simresults$sup_rand_id,c(diff_cols,"sup_rand_id")])

In [106]:
#sinfo[sinfo$sup_rand_id=="O814WK8MN3UOF27JIR",]["rand_id"]

In [107]:
simresults = left_join(simresults, tmp)

Joining, by = "sup_rand_id"



In [108]:
simresults = simresults[simresults$win_len==win_len & simresults$padding == padding & simresults$rescf==1 & as.numeric(simresults$win_size) == win_size & simresults$chr %in% chroms,]

In [109]:
simresults[is.na(simresults$poscoefdecayeps),]$poscoefdecayeps = 0

In [110]:
simresults$selregime = ifelse(simresults$posrate<1e-16,ifelse(simresults$delrate<1e-16, ifelse(simresults$sigma==0, "neutral", "varmut"), "del_only"), ifelse(simresults$delrate<1e-16, "pos_only", "both"))

In [111]:
simresults$seldesc = apply(
    simresults,
    1,
    function(x) paste("mu-D=", x["delrate"],
                      " mu-B=", x["posrate"],
                      ifelse(x['selregime'] %in% c("del_only", "both"), paste0("\ncoef-D=", x["delcoef"]), ""),
                      ifelse(x['selregime'] %in% c("both", "pos_only"), paste0("\ncoef-B=", x["poscoef"], " decay-B=", x["poscoefdecayeps"]), ""),
                      ifelse(x["selregime"]=="varmut", paste0(" mut-sd=", x['sigma']),""),
                      "\n(", x["sup_rand_id"], ")", 
                      sep=""))

In [112]:
simresults$sigma = as.numeric(simresults$sigma)

In [113]:
tmp = simresults
ordered = c("neutral", "del_only", "pos_only", "both", "varmut")
classes = unique(tmp$selregime)
tmp$selregime = factor(tmp$selregime, levels=classes[order(match(classes,ordered))])
tmp = tmp[
  with(tmp, order(selregime,posrate,-poscoefdecayeps,delrate,poscoef,delcoef,sigma)),
]

In [114]:
rownames(tmp) <- NULL
tab=table(tmp$selregime)
tab = c("data"=1, tab)
areas = list()
row = 1
for (regime in names(tab)) {
    col = 1
    for(i in 1:tab[regime]) {
        areas = c(areas, list(area(row,col)))
        col = col + 1
    }
    row = row + 1
}

areas = (do.call(c, areas))

In [116]:
for (k in 1:length(fig_prefixes)) {
    fig_prefix = fig_prefixes[k]
    plots = apply(tmp,1,
                  function(x) readRDS(paste0(x["folderpath"],
                                             "/",fig_prefix, "_",x["desc"],".rds"))+labs(subtitle=x["seldesc"]))

    data_path = paste0("../../output/greatapes-diversity-data/",fig_prefix,"_win-size_",as.integer(win_size),"_merged-mask_True_prop-acc_",prop_acc, ".rds")
    plots=c(list(readRDS(data_path)+labs(subtitle="data")), plots)

    panel_plot = wrap_plots(plots, nrow=length(classes), design=areas, guides="collect")

    wstr = paste0('win-size_',as.integer(win_size))
    plots_folder_path = paste0("../../output/data_and_sims/", wstr,"/pdf")

    dname = paste0(plots_folder_path)
    if(! dir.exists(dname))
            dir.create(dname, recursive=TRUE)


    panel_fname = paste0(plots_folder_path,"/",fig_prefix, "_","panel_sims_and_data_win-size_.pdf")

    panel_pngname = gsub("pdf", "png", panel_fname)


    ggsave(filename=panel_fname, plot=panel_plot, width=widths[k], height=heights[k], scale=1.75, units="cm", limitsize=FALSE)

    pdf_convert(panel_fname, dpi=160, filenames = panel_pngname)
}

"Removed 283 rows containing missing values (geom_point)."
"Removed 294 rows containing missing values (geom_point)."
"Removed 159 rows containing missing values (geom_point)."
"Removed 88 rows containing missing values (geom_point)."
"Removed 80 rows containing missing values (geom_point)."
"Removed 1 rows containing missing values (geom_point)."
"Removed 148 rows containing missing values (geom_point)."
"Removed 1 rows containing missing values (geom_point)."
"Removed 110 rows containing missing values (geom_point)."
"Removed 127 rows containing missing values (geom_point)."
"Removed 39 rows containing missing values (geom_point)."
"Removed 113 rows containing missing values (geom_point)."
"Removed 118 rows containing missing values (geom_point)."
"Removed 55 rows containing missing values (geom_point)."
"Removed 31 rows containing missing values (geom_point)."
"Removed 3 rows containing missing values (geom_point)."


Converting page 1 to ../../output/data_and_sims/win-size_1000000/png/cor-pidxy-dT_panel_sims_and_data_win-size_.png... done!
Converting page 1 to ../../output/data_and_sims/win-size_1000000/png/subset-chr12-landscapes_panel_sims_and_data_win-size_.png... done!
Converting page 1 to ../../output/data_and_sims/win-size_1000000/png/annot-correls-tmrca_panel_sims_and_data_win-size_.png... done!


In [None]:
tmp

In [None]:
fig_prefixes = c("subset-chr12-landscapes", "pairs-dxydxy", "pairs-pidxy", "pairs-pipi", "pairs-pidxy-colbyrec", "pairs-pidxy-colbyex")

In [None]:
for (fig_prefix in fig_prefixes) {
plots_folder_path = paste0("../../output/data_and_sims/", wstr)

dnames = paste0(plots_folder_path, c("/pdf", "/png"))
for (dname in dnames) {
    if(! dir.exists(dname))
        dir.create(dname, recursive=TRUE)
}

data_path2 = paste0("../../output/greatapes-diversity-data/figs/",fig_prefix,"_win-size_",as.integer(win_size),"_merged-mask_True_prop-acc_",prop_acc, ".pdf")

plots_paths = apply(tmp,1,
              function(x) paste0(x["folderpath"],
                                         "/figs/",fig_prefix,"_",x["desc"],".pdf"))
tmp$newfname = gsub(" ", "_", tmp[,"seldesc"])
tmp$newfname = gsub("\n", "_", tmp[,"newfname"])

new_paths = apply(tmp,1,
                 function(x) paste0(plots_folder_path,"/pdf/",fig_prefix,"_",x["selregime"],"_", x['newfname'],".pdf"))

plots_folder_path

file.copy(plots_paths, new_paths)
file.copy(data_path2, paste0(plots_folder_path,"/pdf/",fig_prefix, "_","data.pdf"))
png_paths = gsub("pdf", "png", new_paths)
for (i in 1:length(png_paths)) {
    pdf_convert(new_paths[i], dpi=160, filenames = png_paths[i])
}
}
    
