In [1]:
library(tidyverse)
library(ggplot2)
library(patchwork)
source("helper.R")

-- [1mAttaching packages[22m --------------------------------------- tidyverse 1.3.1 --

[32mv[39m [34mggplot2[39m 3.3.3     [32mv[39m [34mpurrr  [39m 0.3.4
[32mv[39m [34mtibble [39m 3.1.2     [32mv[39m [34mdplyr  [39m 1.0.6
[32mv[39m [34mtidyr  [39m 1.1.3     [32mv[39m [34mstringr[39m 1.4.0
[32mv[39m [34mreadr  [39m 1.4.0     [32mv[39m [34mforcats[39m 0.5.1

-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()



In [2]:
sinfo_path = "../../output/sims_info.tsv"
sinfoh_path = "../../output/header_sims_info.tsv"

In [3]:
win_size = 1000000
win_len = -1
padding = 0
rescf = 1
prop_acc=0.4

In [4]:
fig_prefix = "cor-pidxy-dT"

In [5]:
sims_path = "../../output/sims_results/"
varmut_sims_path = "../../output/varmut/sims_results/"
data_path = paste0("../../output/greatapes-diversity-data/",fig_prefix,"_win-size_",as.integer(win_size),"_merged-mask_True_prop-acc_",prop_acc, ".rds")

In [6]:
# loading sims info table
sinfo = read.table(sinfo_path, sep="\t")
h = read.table(sinfoh_path, sep="\t", header=TRUE)
cnames = colnames(h)
if (length(sinfo) == length(h)+1) {
    cnames = c(cnames, "poscoefdecayeps")
}
colnames(sinfo) = cnames

In [7]:
#diff_cols = c("delrate","delcoef","posrate","poscoef","mu", "rescf", "win_len", "padding", "chr")
diff_cols = c("delrate","delcoef","posrate","poscoef","mu", "poscoefdecayeps", "rescf", "win_len", "padding", "chr")

In [8]:
wsize_str = paste0("win-size_",as.integer(win_size))

In [9]:
sims_folders = paste0(sims_path,list.files(sims_path)[grep(wsize_str, list.files(sims_path))])

In [10]:
varmut_sims_folders = paste0(varmut_sims_path,list.files(varmut_sims_path)[grep(wsize_str, list.files(varmut_sims_path))])

In [11]:
sims_folders = c(sims_folders, varmut_sims_folders)

In [12]:
simresults = as.data.frame(do.call(rbind, lapply(X = sims_folders, function(x) unlist(meta_from_fname(x,prop=prop_acc,suffix="$")))), stringsAsFactors =FALSE)

[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"
[1] "entrei"


In [13]:
#sims_folders

In [14]:
simresults$folderpath = sims_folders

In [15]:
tmp = unique(sinfo[sinfo$sup_rand_id %in% simresults$sup_rand_id,c(diff_cols,"sup_rand_id")])

In [16]:
sinfo[sinfo$sup_rand_id=="O814WK8MN3UOF27JIR",]["rand_id"]

Unnamed: 0_level_0,rand_id
Unnamed: 0_level_1,<fct>
131,6DRX6RL6BNZCHP8
2558,6DRX6RL6BNZCHP8
3323,6DRX6RL6BNZCHP8
5056,6DRX6RL6BNZCHP8
5117,6DRX6RL6BNZCHP8
5649,6DRX6RL6BNZCHP8
5812,6DRX6RL6BNZCHP8
6261,6DRX6RL6BNZCHP8
7464,6DRX6RL6BNZCHP8
8288,6DRX6RL6BNZCHP8


In [17]:
simresults = left_join(simresults, tmp)

Joining, by = "sup_rand_id"



In [18]:
simresults = simresults[simresults$win_len==win_len & simresults$padding == padding & simresults$rescf==1 & as.numeric(simresults$win_size) == win_size,]

In [19]:
win_size

In [20]:
simresults[is.na(simresults$poscoefdecayeps),]$poscoefdecayeps = 0

In [21]:
simresults$selregime = ifelse(simresults$posrate<1e-16,ifelse(simresults$delrate<1e-16, ifelse(simresults$sigma==0, "neutral", "varmut"), "del_only"), ifelse(simresults$delrate<1e-16, "pos_only", "both"))

In [22]:
simresults$seldesc = apply(simresults,1, function(x) paste("mu-D=", x["delrate"], " mu-B=", x["posrate"], ifelse(x['selregime'] %in% c("del_only", "both"), paste0("\ncoef-D=", x["delcoef"]), ""), ifelse(x['selregime'] %in% c("both", "pos_only"), paste0("\ncoef-B=", x["poscoef"], " decay-B=", x["poscoefdecayeps"]), ""), ifelse(x["selregime"]=="varmut", paste0(" mut-sd=", x['sigma']),""), sep=""))

In [23]:
#simresults

In [24]:
simresults$sigma = as.numeric(simresults$sigma)

In [25]:
tmp = simresults
ordered = c("neutral", "del_only", "pos_only", "both", "varmut")
classes = unique(tmp$selregime)
tmp$selregime = factor(tmp$selregime, levels=classes[order(match(classes,ordered))])
tmp = tmp[
  with(tmp, order(selregime,posrate,-poscoefdecayeps,delrate,poscoef,delcoef,sigma)),
]

In [26]:
order(tmp$selregime)

In [27]:
rownames(tmp) <- NULL
plots = apply(tmp,1,
              function(x) readRDS(paste0(x["folderpath"],
                                         "/",fig_prefix, "_",x["desc"],".rds"))+ggtitle(x["seldesc"]))
#plots = c(readRDS(data_path)+ggtitle("data"), plots)

In [28]:
plots=c(list(readRDS(data_path)+ggtitle("data")), plots)

In [29]:
tab=table(tmp$selregime)

In [30]:
tab = c("data"=1, tab)

In [31]:
tab

In [32]:
areas = list()
row = 1
for (regime in names(tab)) {
    col = 1
    for(i in 1:tab[regime]) {
        areas = c(areas, list(area(row,col)))
        col = col + 1
    }
    row = row + 1
}

In [33]:
areas = (do.call(c, areas))

In [34]:
#design=""
#for (regime in names(tab)) {
#    if (design != "") design = paste0(design,"\n")
#    num_cols = 1:tab[regime]
#    num_blanks = max(tab)-tab[regime]
#    if (regime == "data") {
#        indexes = c(1)
#    } else {
#        indexes = 1+as.numeric(rownames(tmp[tmp$selregime==regime,]))        
#    }
#    print(LETTERS[indexes])
#    print(rep("#", num_blanks))
#    add = paste0(paste0(LETTERS[indexes],collapse=""), paste0(rep("#", num_blanks),collapse=""), collapse="")
#    print(design)
#    print(add)
#    design = paste0(design,add)
#    print(design)
#}

In [35]:
tmp$selregime

In [36]:
panel_plot = wrap_plots(plots, nrow=length(classes), design=areas, guides="collect")

In [37]:
wstr = paste0('win-size_',as.integer(win_size))
plots_folder_path = paste0("../../output/data_and_sims/", wstr,"/", fig_prefix)

dname = paste0(plots_folder_path)
if(! dir.exists(dname))
        dir.create(dname, recursive=TRUE)


In [39]:
ggsave(filename=paste0(plots_folder_path,"/","panel_sims_and_data_win-size_.pdf"), plot=panel_plot, width=100, height=25, scale=1.75, units="cm", limitsize=FALSE)

"Removed 283 rows containing missing values (geom_point)."
"Removed 294 rows containing missing values (geom_point)."
"Removed 159 rows containing missing values (geom_point)."
"Removed 88 rows containing missing values (geom_point)."
"Removed 80 rows containing missing values (geom_point)."
"Removed 1 rows containing missing values (geom_point)."
"Removed 148 rows containing missing values (geom_point)."
"Removed 1 rows containing missing values (geom_point)."
"Removed 110 rows containing missing values (geom_point)."
"Removed 127 rows containing missing values (geom_point)."
"Removed 39 rows containing missing values (geom_point)."
"Removed 113 rows containing missing values (geom_point)."
"Removed 118 rows containing missing values (geom_point)."
"Removed 55 rows containing missing values (geom_point)."
"Removed 31 rows containing missing values (geom_point)."
"Removed 3 rows containing missing values (geom_point)."


In [None]:
tmp

In [40]:
fig_prefix = "subset-chr12-landscapes"
plots_folder_path = paste0("../../output/data_and_sims/", wstr,"/", fig_prefix)

dname = paste0(plots_folder_path)
if(! dir.exists(dname))
        dir.create(dname, recursive=TRUE)

In [41]:
data_path2 = paste0("../../output/greatapes-diversity-data/figs/",fig_prefix,"_win-size_",as.integer(win_size),"_merged-mask_True_prop-acc_",prop_acc, ".pdf")

In [42]:
plots_paths = apply(tmp,1,
              function(x) paste0(x["folderpath"],
                                         "/figs/",fig_prefix,"_",x["desc"],".pdf"))
tmp$newfname = gsub(" ", "_", tmp[,"seldesc"])
tmp$newfname = gsub("\n", "_", tmp[,"newfname"])

new_paths = apply(tmp,1,
                 function(x) paste0(plots_folder_path,"/",x["selregime"],"_", x['newfname'],".pdf"))

In [43]:
plots_folder_path

In [44]:
file.copy(plots_paths, new_paths)
file.copy(data_path2, paste0(plots_folder_path,"/data.pdf"))

In [45]:
plots_paths

In [None]:
tmp[1,"desc"]