-
Notifications
You must be signed in to change notification settings - Fork 0
/
recipe_sel_greatapes.slim
162 lines (156 loc) · 5.77 KB
/
recipe_sel_greatapes.slim
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
initialize() {
if(exists("rescf")) {
defineConstant("Q", asInteger(rescf));
} else {
defineConstant("Q", 1);
}
if (exists("seed")) {
setSeed(asInteger(seed));
}
// this should be used for sims with large populations being run for a long period of time. the number of nodes in the treeseq explodes.
if (exists("siminterval")) {
if (siminterval =="") {
initializeTreeSeq();
} else {
catn("Setting simplification interval");
initializeTreeSeq(simplificationInterval=asInteger(siminterval));
}
} else {
initializeTreeSeq();
}
initializeMutationRate(asFloat(mu)*Q);
//del mutations
initializeMutationType("m1", 0.5, "g", asFloat(delcoef)*Q, 0.16); //shape per Castellano et al 2019 Genetics
initializeMutationType("m2", 0.5, "e", asFloat(poscoef)*Q);
initializeGenomicElementType("g1", c(m1,m2), c(asFloat(delprop),asFloat(posprop)));
f = readFile(exonfile);
i = 0;
last = 0;
for (line in f) {
coord = asInteger(strsplit(line[0], "\t")[1:2]);
// just in case my file goes for longer than the stretch I want to simulate
if (coord[0] > asInteger(L)-1) {
break;
}
if(coord[1] > asInteger(L)-1) {
coord[1] = asInteger(L)-1;
}
// the -1 in the end coordinate is bc bed are not inclusive, but slim is
initializeGenomicElement(g1, coord[0], coord[1]-1);
last = coord[1];
i=i+1;
}
if(i==0) {
initializeGenomicElement(g1, 0, asInteger(L)-1);
}
// hacky way of making the last base always be a genomic element - downstream this matters for recapitation/overlay w/ mutation
if (last < asInteger(L)-1 & i != 0) {
initializeGenomicElement(g1,asInteger(L)-2, asInteger(L)-1);
}
//reading rec map
lines = readFile(recfile);
rates=NULL;
ends=NULL;
for (line in lines) {
cols = strsplit(line,"\t");
ends = c(ends, asInteger(cols[2])-1);
rates = c(rates, asFloat(cols[3]));
}
// IMPLEMENT CASE WHERE L IS SMALLER THAN REC MAP
if (ends[length(ends)-1] > asInteger(L)-1) {
// adding a conditional to make sure rec is at least the first rate
if(ends[0] > asInteger(L)-1) {
ends[0]=asInteger(L)-1;
}
rates=rates[ends<asInteger(L)];
ends=ends[ends<asInteger(L)];
ends[length(ends)-1] = asInteger(L)-1;
}
// CASE WHERE LAST END IS SMALLER THAN REC MAP
if (ends[length(ends)-1]< asInteger(L)-1) {
ends = c(ends,asInteger(L)-1);
rates=c(rates,0.0);
}
// rescaling rates
rates = rates * 1e-8;
rates=(1-(1-2*rates)^Q)/2;
initializeRecombinationRate(rates,ends);
}
1 late() {
// rescaling pop size and generations
pop_size = integerDiv(asInteger(N),Q);
n_gens = integerDiv(asInteger(gens),Q);
at_gen = 0;
// making sure the path constant exists
if (!exists("path_population_tree"))
defineConstant("path_population_tree", "");
// setting path to infile -- restarting from there
infile = path_population_tree;
// checking if any intermediate file has been saved
// so that we can restart from there
if (n_gens > 5000) {
outs = system("python find_temp_trees.py "+outfile);
if (length(outs) == 2)
{
infile = outs[0];
at_gen = asInteger(outs[1]);
catn("Found intermediate TreeSequence to start from: "+infile+" - generation "+asString(at_gen));
}
}
if (infile == "") {
sim.addSubpop("p1", pop_size);
} else {
catn("Reading previous sim from file "+infile);
sim.readFromPopulationFile(infile);
p1.setSubpopulationSize(pop_size);
}
// defining start gen
defineConstant("branch_start_gen", sim.generation-at_gen);
defineConstant("sim_start_gen", sim.generation);
catn("Starting sim from generation: "+asString(sim_start_gen)+", Branch started at: "+asString(branch_start_gen));
// rescheduling the time to end simulation
sim.rescheduleScriptBlock(s0, generations=branch_start_gen+n_gens);
//schedule sampling in equaly spaced intervals
intervals = 1:3/4;
remember_times = branch_start_gen+asInteger(ceil(intervals*n_gens));
remember_times = remember_times[remember_times > sim_start_gen];
if (length(remember_times[remember_times == sim_start_gen]) > 0)
{
catn("Remembering at generation "+sim.generation);
sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
}
if (length(remember_times) > 0)
sim.rescheduleScriptBlock(s1, generations=remember_times);
// scheduling saving output + progress messages
if (n_gens > 5000) {
saving_times = branch_start_gen+seq(5000, n_gens, 5000);
saving_times = saving_times[saving_times > sim_start_gen];
if (length(saving_times) > 0)
sim.rescheduleScriptBlock(s2, generations=saving_times);
}
}
// final event: save tree seq
s0 5 late() {
sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
sim.treeSeqOutput(outfile);
catn("Saving output tree in generation "+sim.generation);
catn("To file: "+outfile);
system("python clear_temp_trees.py "+outfile);
catn("Clearing intermediate tree files");
}
// remembering individuals
s1 5 late() {
catn("Remembering at generation "+sim.generation);
sim.treeSeqRememberIndividuals(sim.subpopulations.individuals);
}
// progress + saving trees
s2 5 late() {
gen = (sim.generation-branch_start_gen);
if (gen % 5000 == 0) {
catn("You got to generation "+sim.generation);
}
if (gen % 10000 == 0) {
catn("Saving tree generation "+ sim.generation);
sim.treeSeqOutput(outfile+asString(gen));
}
}