-
Notifications
You must be signed in to change notification settings - Fork 0
/
mutationalBias.js
396 lines (358 loc) · 13.3 KB
/
mutationalBias.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
/* __DOC__ equivalent:
The library uses an object mutball, as in mutation ball, like tarball, which store all the variables.
DOM I/O is handled by mutationalAux.js, in there there are some elements whose IDs match attributes of the mutball and it reads the query string too if the webhost allows it. The attributes are:
source: a note telling us whence the object came
sequence: ATATCGG
baseList:G286A T306C A687T T880C\nWT\nWT
freqMean: mean frequency of number of mutations per sequence, average
freqVar: var of #muts/seq
freqList: array of the mutation counts of the rows of baselist.
freqΣ: sum of #muts/seq sampled
freqλ: Poisson distribution of #muts/seq
rawTable: 4x4 nested arrays. containing the mutation spectrum observed
mutTable: as above but normalised.
sumA: 29.6
sumT etc.
A2T number of incidents going from N to N. There are 16 of these.
size: gene size in kb.
_types: seriously lazy way of doing this, its an array of the attributes I care about. python's __dict__
TsOverTv and TsOverTv_error: transitions over transversions
More like that, see _types. W=weak AT, S=strong GC, N=any ΣTs total transitions. ΣTv transversions.
The mutball constructor is mutagen.
*/
//**************************************************
//Globals. Turbo Pascal style declaration at the top. Go high school nostagia!
var A = 0; //Not the nicest way, but it's handy way to avoid a needless obj and quotes.
var T = 1;
var G = 2;
var C = 3;
var bases = ['A', 'T', 'G', 'C']; // Unfortunately two different approaches to fill tables were inadvertedly taken.
var ways=[];
for (b in bases) {
for (d in bases) {
ways.push(bases[b]+"2"+bases[d]);
}
}
//**************************************************
//the mutball object.
//Commit and read were initially written as methods of mutball, but were moved out in order to quarantine interactions with document to the document.
//The single letter codes (A, C, T, G, S, W, N and so forth are the normal ones. see https://en.wikipedia.org/wiki/Nucleic_acid_notation if unfamiliar. Ts transition, Tv transversion.
function mutagen() {
var mutball = {
source: "load",
sequence: "",
baseList: "",
freqMean: "N/A",
freqVar: "N/A",
freqList: "N/A",
mutTable: [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]
],
compTable: [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]
],
rawTable: [
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0],
[0, 0, 0, 0]
],
_types: ['TsOverTv', 'W2SOverS2W', 'W2N', 'S2N', 'W2S', 'S2W', 'ΣTs', 'Ts1', 'Ts2', 'ΣTv', 'TvW', 'TvN1', 'TvS', 'TvN2']
}
for (b in bases) {
mutball["sum" + bases[b]] = "25";
}
for (b in ways) {
mutball[ways[b]] = "0";
}
return mutball;
}
//**********************************
//Math fnxs
function variance(values) {
var avg = average(values);
var squareDiffs = values.map(function(value) {
var diff = value - avg;
return diff * diff;
});
return average(squareDiffs);
}
function average(data) {
var sum = data.reduce(function(sum, value) {
return sum + value;
}, 0);
return sum / data.length;
}
function rd(x) {
return Math.round(x * 10) / 10;
}
function transSum(m, x, y, w, z) {
var s = m[x][y] + m[w][z];
var v = variance([m[x][y], m[w][z]]);
var d = Math.sqrt(v / 2);
return [s, d, v];
}
function varRatio(mx, vx, my, vy, n) {
var mxsq = Math.pow(mx, 2);
var mysq = Math.pow(my, 2);
var mxqd = Math.pow(mx, 4);
return Math.abs(vy / mxsq + mysq * vx / mxqd) / n; //covariance set to 0.
}
function qV(m, x, y) {
return variance([m[x][y], m[y][x]]);
}
function fit(ordinate) {
//ordinate=mutball.freqList;
abscissa = [];
s = ordinate.reduce(function(a, b) {
return a + b;
}); //And this is why JS is a pain.
//load underscore range? Nah, I need to fix blanks and normalise
for (n = 0; n < ordinate.length; n++) {
ordinate[n] = ordinate[n] / s || 0;
abscissa[n] = n;
}
//
//Math.factorial() does not call. http://mathjs.org/docs/reference/functions/factorial.html says it should.
poisson = function(x, P) {
λ = P[0];
return x.map(function(xi) {
function factorial(n) {
if (n == 0) {
return 1
} else if (n == 1) {
return 1
} else {
return n-- * factorial(n)
}
}
return Math.pow(λ, xi) * Math.exp(-λ) / factorial(xi)
})
}
return fminsearch(poisson, [0.5], abscissa, ordinate);
}
//Major fnxs
function calcFreq(mutball) {
//mutball = mutball || mutagen().read(["sequence","baseList"]);
//parse sequence
var seq = mutball.sequence.replace(/\>.*?(\n|\r)/g, '').replace(/U/g, 'T').replace(/[^ATGC]/g, '').split('');//lowercase means masked normally, so best no i.
var freq = {
'A': 0,
'C': 0,
'G': 0,
'T': 0
};
for (var x = 0; x < seq.length; x++) {
freq[seq[x]]++;
}
for (var x = 0; x < 4; x++) {
mutball["sum" + bases[x]] = Math.round(freq[bases[x]] / seq.length * 1000) / 10;
}
mutball.size=Math.round(mutball.sequence.length/100)/10;
//parse mutants
var tally = [];
var raw_list = mutball.baseList.replace(/U/g, 'T').split(/\r\n|\r|\n|\;/g);
for (var i = 0; i < raw_list.length; i++) {
if (!raw_list[i].match(/\w/)) {
continue;
}
var variant = raw_list[i].split(/[\s\t\,]+/g);//split on white space.
if ((variant[0] == 'WT') || (variant[0] == 'wt') || (variant[0].match("wild"))) {
tally[i] = 0;
continue;
}
tally[i] = variant.length;
for (var j = 0; j < variant.length; j++) {
//console.log(variant[j]);
if (variant[j].match(/\>/)) {mutball[variant[j].substr(-3, 1) + "2" + variant[j].substr(-1, 1)]++;} //123A>C
else if (variant[j].match(/\w/)) {mutball[variant[j].substr(0, 1) + "2" + variant[j].substr(-1, 1)]++;} //A123C
else {tally[i]-=1;} ///Don't die? why is there this formatting error? drunk student?
}
}
var sum=tally.reduce(function(a, b) {return a + b;});
for (var x = 0; x < 4; x++) {
for (var y = 0; y < 4; y++) {
mutball.rawTable[x][y]=mutball[bases[x]+ "2" + bases[y]]/sum*100;
}
}
mutball.freqMean = Math.round(average(tally) * 10) / 10;
mutball.freqVar = Math.round(variance(tally) * 10) / 10;
mutball.freqΣ=tally.reduce(function(sum, value) {return sum + value;}, 0);
var pivot = []; //can't remember the fancy trick to make a counter
for (var j = 0; j < tally.length; j++) {
pivot[tally[j]] = pivot[tally[j]] || 0;
pivot[tally[j]]++;
}
for (var j = 0; j < pivot.length; j++) { //copypaste fix. clean needed.
pivot[j] = pivot[j] || 0;
}
mutball.freqList = pivot.map(function(x) {
return x
}); //deref and re-ref.
mutball.freqλ = rd(fit(pivot) * 10) / 10;
mutball.source = "freq";
//not mutball.mutTable_raw! as I don't want to loose the input boxes.
return mutball;
}
function calcBias(mutball) {
//don't use storage as it could be user given.
//mutball = mutball || mutagen().read(ways.concat(["sumA","sumT","sumG","sumC"]));
//Get data. As they are separate input boxes the thing is more nightmare
var Amuts = ["A2A", "A2T", "A2G", "A2C"].map(function(x) {
return mutball[x]
});
var Tmuts = ["T2A", "T2T", "T2G", "T2C"].map(function(x) {
return mutball[x]
});
var Gmuts = ["G2A", "G2T", "G2G", "G2C"].map(function(x) {
return mutball[x]
});
var Cmuts = ["C2A", "C2T", "C2G", "C2C"].map(function(x) {
return mutball[x]
});
var muts = [Amuts, Tmuts, Gmuts, Cmuts];
var dis = ["sumA", "sumT", "sumG", "sumC"].map(function(x) {
return mutball[x]
});;
//Normalise
var summa = 0;
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
muts[i][j] /= dis[i];
summa += muts[i][j];
}
}
summa /= 100; // /100 --> %
for (i = 0; i < 4; i++) {
for (j = 0; j < 4; j++) {
muts[i][j] /= summa;
}
}
mutball.mutTable = muts;
//the 4 2x2 submatrices are symmetric if ordered AT-GC.
for (h=0; h<3; h+=2){ //0 or 2
for (g=0; g<3; g+=2){
console.log("submatrix "+h+","+g);
for (x =0; x<2; x++) {
for (y =0; y<2; y++) {
//console.log(h+"+"+x+" into "+g+"+"+y+" vs. "+h+"+"+y+" into "+g+"+"+x);
//console.log(bases[h+x]+'2'+bases[g+y]+' '+bases[h+y]+'2'+bases[g+x]);
mutball.compTable[h+x][g+y]=muts[h+x][g+y]/2 +muts[h+!x][g+!y]/2;//it actually switches the diagonal. But it's zero.
}
}
}
}
//Check: if the following is used, the errors should be zero.
//muts=mutball.compTable;
//Canculate fancy table
//A>G + T>C
var Ts1 = transSum(muts, A, G, T, C);
//G>A + C>T
var Ts2 = transSum(muts, G, A, C, T);
//Ts total
//As of ECMA 5, unicode variables you say?
var ΣTs = [];
ΣTs[0] = Ts1[0] + Ts2[0];
ΣTs[2] = Ts1[2] + Ts2[2]; //Biename for variance.
ΣTs[1] = Math.sqrt(ΣTs[2] / 2); // n is 2
//A>T + T>A
var TvW = transSum(muts, A, T, T, A);
//A→C, T→G
var TvN1 = transSum(muts, A, C, T, G);
// G→C, C→G
var TvS = transSum(muts, G, C, C, G);
// G→T, C→A
var TvN2 = transSum(muts, G, T, C, A);
//Tv total
var ΣTv = [];
ΣTv[0] = TvW[0] + TvS[0] + TvN1[0] + TvN2[0];
ΣTv[2] = TvW[2] + TvS[2] + TvN1[2] + TvN2[2]; //Biename for variance.
ΣTv[1] = Math.sqrt(ΣTv[2] / 4); // n =4
//Ts/Tv
var TsOverTv = [];
TsOverTv[0] = ΣTs[0] / ΣTv[0];
TsOverTv[2] = varRatio(ΣTs[0], ΣTs[2], ΣTv[0], ΣTv[2], 2); // Taylor w n =2
TsOverTv[1] = Math.sqrt(TsOverTv[2] / 2);
//AT→GC
var W2S = [];
W2S[0] = muts[A][G] + muts[A][C] + muts[T][G] + muts[T][C];
W2S[2] = variance([muts[A][G], muts[T][C]]) + variance([muts[A][C], muts[T][G]]);
W2S[1] = Math.sqrt(W2S[2] / 2);
//GC→AT
var S2W = [];
S2W[0] = muts[G][A] + muts[G][T] + muts[C][A] + muts[C][T];
S2W[2] = variance([muts[G][A], muts[C][T]]) + variance([muts[G][T], muts[C][A]]);
S2W[1] = Math.sqrt(S2W[2] / 2);
//AT→GC/GC→AT
var W2SOverS2W = [];
W2SOverS2W[0] = W2S[0] / S2W[0];
W2SOverS2W[2] = varRatio(W2S[0], W2S[2], S2W[0], S2W[2], 2);
W2SOverS2W[1] = Math.sqrt(W2SOverS2W[2] / 2);
////AT→GC/GC→AT
var W2SOverS2W = [];
W2SOverS2W[0] = W2S[0] / S2W[0];
W2SOverS2W[2] = varRatio(W2S[0], W2S[2], S2W[0], S2W[2], 2);
W2SOverS2W[1] = Math.sqrt(W2SOverS2W[2] / 2);
//A→N, T→N
var W2N = [];
W2N[0] = muts[A][T] + muts[A][G] + muts[A][C] + muts[T][A] + muts[T][G] + muts[T][C];
W2N[2] = qV(muts, A, T) + qV(muts, A, G) + qV(muts, A, C) + qV(muts, T, A) + qV(muts, T, G) + qV(muts, T, C);
W2N[1] = Math.sqrt(W2N[2] / 6);
//G→N, C→N
var S2N = [];
S2N[0] = muts[G][A] + muts[G][T] + muts[G][C] + muts[C][A] + muts[C][G] + muts[C][T];
S2N[2] = qV(muts, G, A) + qV(muts, G, T) + qV(muts, G, C) + qV(muts, C, A) + qV(muts, C, G) + qV(muts, C, T);
S2N[1] = Math.sqrt(W2N[2] / 6);
var types = mutball._types;
for (var x = 0; x < types.length; x++) {
var indicator = types[x]; //I hate JS. 'for in' couldn't be used.
eval("mutball." + indicator + "=rd(" + indicator + "[0])");
//this[indicator] did not work. So much for dynamic variables.
eval("mutball." + indicator + "_error=rd(" + indicator + "[1])");
}
mutball.source = 'Bias';
return mutball;
}
//The following function was copied from https://jmat.googlecode.com/git/jmat.js
//It seemed a bit silly loading a whole library for a function.
function fminsearch(fun,Parm0,x,y,Opt){// fun = function(x,Parm)
if(!Opt){Opt={}};
if(!Opt.maxIter){Opt.maxIter=1000};
if(!Opt.step){// initial step is 1/100 of initial value (remember not to use zero in Parm0)
Opt.step=Parm0.map(function(p){return p/100});
Opt.step=Opt.step.map(function(si){if(si==0){return 1}else{ return si}}); // convert null steps into 1's
};
if(typeof(Opt.display)=='undefined'){Opt.display=true};
if(!Opt.objFun){Opt.objFun=function(y,yp){return y.map(function(yi,i){return Math.pow((yi-yp[i]),2)}).reduce(function(a,b){return a+b})}} //SSD
var cloneVector=function(V){return V.map(function(v){return v})};
var ya,y0,yb,fP0,fP1;
var P0=cloneVector(Parm0),P1=cloneVector(Parm0);
var n = P0.length;
var step=Opt.step;
var funParm=function(P){return Opt.objFun(y,fun(x,P))}//function (of Parameters) to minimize
// silly multi-univariate screening
for(var i=0;i<Opt.maxIter;i++){
for(var j=0;j<n;j++){ // take a step for each parameter
P1=cloneVector(P0);
P1[j]+=step[j];
if(funParm(P1)<funParm(P0)){ // if parm value going in the righ direction
step[j]=1.2*step[j]; // then go a little faster
P0=cloneVector(P1);
}
else{
step[j]=-(0.5*step[j]); // otherwiese reverse and go slower
}
}
if(Opt.display){if(i>(Opt.maxIter-10)){console.log(i+1,funParm(P0),P0)}}
}
if (!!document.getElementById('plot')){ // if there is then use it
fminsearch.plot(x,y,fun(x,P0),P0);
}
return P0
};