/
OPLS.js
579 lines (536 loc) · 18.9 KB
/
OPLS.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
import { isAnyArray } from 'is-any-array';
import { ConfusionMatrix } from 'ml-confusion-matrix';
import { getFolds } from 'ml-cross-validation';
import { Matrix, NIPALS } from 'ml-matrix';
import { getRocCurve, getAuc, getClasses } from 'ml-roc-multiclass';
import { oplsNipals } from './oplsNipals.js';
import { tss } from './util/tss.js';
/**
* Creates new OPLS (orthogonal partial latent structures) from features and labels.
* @param {Array} data - matrix containing data (X).
* @param {Array} labels - 1D Array containing metadata (Y).
* @param {Object} [options={}]
* @param {boolean} [options.center = true] - should the data be centered (subtract the mean).
* @param {boolean} [options.scale = true] - should the data be scaled (divide by the standard deviation).
* @param {Array} [options.cvFolds = []] - Allows to provide folds as array of objects with the arrays trainIndex and testIndex as properties.
* @param {number} [options.nbFolds = 7] - Allows to generate the defined number of folds with the training and test set choosen randomly from the data set.
* */
export class OPLS {
constructor(data, labels, options = {}) {
if (data === true) {
const opls = options;
this.labels = opls.labels;
this.center = opls.center;
this.scale = opls.scale;
this.means = opls.means;
this.meansY = opls.meansY;
this.stdevs = opls.stdevs;
this.stdevsY = opls.stdevsY;
this.model = opls.model;
this.predictiveScoresCV = opls.predictiveScoresCV;
this.orthogonalScoresCV = opls.orthogonalScoresCV;
this.yHatScoresCV = opls.yHatScoresCV;
this.mode = opls.mode;
this.output = opls.output;
return;
}
const features = new Matrix(data);
// set default values
// cvFolds allows to define folds for testing purpose
const { center = true, scale = true, cvFolds = [], nbFolds = 7 } = options;
this.labels = labels;
let group;
if (typeof labels[0] === 'number') {
// numeric labels: OPLS regression is used
this.mode = 'regression';
group = Matrix.from1DArray(labels.length, 1, labels);
} else if (typeof labels[0] === 'string') {
// non-numeric labels: OPLS-DA is used
this.mode = 'discriminantAnalysis';
group = Matrix.checkMatrix(createDummyY(labels)).transpose();
}
// getting center and scale the features (all)
this.center = center;
if (this.center) {
this.means = features.mean('column');
this.meansY = group.mean('column');
} else {
this.stdevs = null;
}
this.scale = scale;
if (this.scale) {
this.stdevs = features.standardDeviation('column');
this.stdevsY = group.standardDeviation('column');
} else {
this.means = null;
}
// check and remove for features with sd = 0 TODO here
// check opls.R line 70
const folds = cvFolds.length > 0 ? cvFolds : getFolds(labels, nbFolds);
const Q2 = [];
const aucResult = [];
this.model = [];
this.predictiveScoresCV = [];
this.orthogonalScoresCV = [];
this.yHatScoresCV = [];
const oplsCV = [];
let modelNC = [];
// this code could be made more efficient by reverting the order of the loops
// this is a legacy loop to be consistent with R code from MetaboMate package
// this allows for having statistic (R2) from CV to decide wether to continue
// with more latent structures
let overfitted = false;
let nc = 0;
let value;
do {
const yHatk = new Matrix(group.rows, 1);
const predictiveScoresK = new Matrix(group.rows, 1);
const orthogonalScoresK = new Matrix(group.rows, 1);
oplsCV[nc] = [];
for (let f = 0; f < folds.length; f++) {
const trainTest = this._getTrainTest(features, group, folds[f]);
const testFeatures = trainTest.testFeatures;
const trainFeatures = trainTest.trainFeatures;
const trainLabels = trainTest.trainLabels;
// determine center and scale of training set
const dataCenter = trainFeatures.mean('column');
const dataSD = trainFeatures.standardDeviation('column');
// center and scale training set
if (center) {
trainFeatures.center('column');
trainLabels.center('column');
}
if (scale) {
trainFeatures.scale('column');
trainLabels.scale('column');
}
// perform opls
let oplsk;
if (nc === 0) {
oplsk = oplsNipals(trainFeatures, trainLabels);
} else {
oplsk = oplsNipals(oplsCV[nc - 1][f].filteredX, trainLabels);
}
// store model for next component
oplsCV[nc][f] = oplsk;
const plsCV = new NIPALS(oplsk.filteredX, { Y: trainLabels });
// scaling the test dataset with respect to the train
testFeatures.center('column', { center: dataCenter });
testFeatures.scale('column', { scale: dataSD });
const Eh = testFeatures;
// removing the orthogonal components from PLS
let scores;
for (let idx = 0; idx < nc + 1; idx++) {
scores = Eh.mmul(oplsCV[idx][f].weightsXOrtho.transpose()); // ok
Eh.sub(scores.mmul(oplsCV[idx][f].loadingsXOrtho));
}
// prediction
const predictiveComponents = Eh.mmul(plsCV.w.transpose());
const yHatComponents = predictiveComponents
.mmul(plsCV.betas)
.mmul(plsCV.q.transpose()); // ok
const yHat = new Matrix(yHatComponents.rows, 1);
for (let i = 0; i < yHatComponents.rows; i++) {
yHat.setRow(i, [yHatComponents.getRowVector(i).sum()]);
}
// adding all prediction from all folds
for (let i = 0; i < folds[f].testIndex.length; i++) {
yHatk.setRow(folds[f].testIndex[i], [yHat.get(i, 0)]);
predictiveScoresK.setRow(folds[f].testIndex[i], [
predictiveComponents.get(i, 0),
]);
orthogonalScoresK.setRow(folds[f].testIndex[i], [scores.get(i, 0)]);
}
} // end of loop over folds
this.predictiveScoresCV.push(predictiveScoresK);
this.orthogonalScoresCV.push(orthogonalScoresK);
this.yHatScoresCV.push(yHatk);
// calculate Q2y for all the prediction (all folds)
// ROC for DA is not implemented (check opls.R line 183) TODO
const tssy = tss(group.center('column').scale('column'));
let press = 0;
for (let i = 0; i < group.columns; i++) {
press += tss(group.getColumnVector(i).sub(yHatk));
}
const Q2y = 1 - press / group.columns / tssy;
Q2.push(Q2y);
if (this.mode === 'regression') {
value = Q2y;
} else if (this.mode === 'discriminantAnalysis') {
const rocCurve = getRocCurve(labels, yHatk.to1DArray());
const areaUnderCurve = getAuc(rocCurve);
aucResult.push(areaUnderCurve);
value = areaUnderCurve;
}
// calculate the R2y for the complete data
if (nc === 0) {
modelNC = this._predictAll(features, group);
} else {
modelNC = this._predictAll(modelNC.xRes, group, {
scale: false,
center: false,
});
}
// adding the predictive statistics from CV
let listOfValues;
modelNC.Q2y = Q2;
if (this.mode === 'regression') {
listOfValues = Q2;
} else {
listOfValues = aucResult;
modelNC.auc = aucResult;
}
modelNC.value = value;
if (nc > 0) {
overfitted = value - listOfValues[nc - 1] < 0.05;
}
this.model.push(modelNC);
// store the model for each component
nc++;
// console.warn(`OPLS iteration over # of Components: ${nc + 1}`);
} while (!overfitted); // end of loop over nc
// store scores from CV
const predictiveScoresCV = this.predictiveScoresCV;
const orthogonalScoresCV = this.orthogonalScoresCV;
const yHatScoresCV = this.yHatScoresCV;
const m = this.model[nc - 1];
const orthogonalData = new Matrix(features.rows, features.columns);
const orthogonalScores = new Matrix(features.rows, nc - 1);
const orthogonalLoadings = new Matrix(nc - 1, features.columns);
const orthogonalWeights = new Matrix(nc - 1, features.columns);
for (let i = 0; i < this.model.length - 1; i++) {
orthogonalData.add(this.model[i].XOrth);
orthogonalScores.setSubMatrix(this.model[i].orthogonalScores, 0, i);
orthogonalLoadings.setSubMatrix(this.model[i].orthogonalLoadings, i, 0);
orthogonalWeights.setSubMatrix(this.model[i].orthogonalWeights, i, 0);
}
const FeaturesCS = features.center('column').scale('column');
let labelsCS;
if (this.mode === 'regression') {
labelsCS = group.clone().center('column').scale('column');
} else {
labelsCS = group;
}
const orthogonalizedData = FeaturesCS.clone().sub(orthogonalData);
const plsCall = new NIPALS(orthogonalizedData, { Y: labelsCS });
const residualData = orthogonalizedData
.clone()
.sub(plsCall.t.mmul(plsCall.p));
const R2x = this.model.map((x) => x.R2x);
const R2y = this.model.map((x) => x.R2y);
this.output = {
Q2y: Q2,
auc: aucResult,
R2x,
R2y,
predictiveComponents: plsCall.t,
predictiveLoadings: plsCall.p,
predictiveWeights: plsCall.w,
betas: plsCall.betas,
Qpc: plsCall.q,
predictiveScoresCV,
orthogonalScoresCV,
yHatScoresCV,
oplsCV,
orthogonalScores,
orthogonalLoadings,
orthogonalWeights,
Xorth: orthogonalData,
yHat: m.totalPred,
Yres: m.plsC.yResidual,
residualData,
folds,
};
}
/**
* get access to all the computed elements
* Mainly for debug and testing
* @return {Object} output object
*/
getLogs() {
return this.output;
}
getScores() {
const scoresX = this.predictiveScoresCV.map((x) => x.to1DArray());
const scoresY = this.orthogonalScoresCV.map((x) => x.to1DArray());
return { scoresX, scoresY };
}
/**
* Load an OPLS model from JSON
* @param {Object} model
* @return {OPLS}
*/
static load(model) {
if (typeof model.name !== 'string') {
throw new TypeError('model must have a name property');
}
if (model.name !== 'OPLS') {
throw new RangeError(`invalid model: ${model.name}`);
}
return new OPLS(true, [], model);
}
/**
* Export the current model to a JSON object
* @return {Object} model
*/
toJSON() {
return {
name: 'OPLS',
labels: this.labels,
center: this.center,
scale: this.scale,
means: this.means,
stdevs: this.stdevs,
model: this.model,
mode: this.mode,
output: this.output,
predictiveScoresCV: this.predictiveScoresCV,
orthogonalScoresCV: this.orthogonalScoresCV,
yHatScoresCV: this.yHatScoresCV,
};
}
/**
* Predict scores for new data
* @param {Matrix} features - a matrix containing new data
* @param {Object} [options={}]
* @param {Array} [options.trueLabel] - an array with true values to compute confusion matrix
* @param {Number} [options.nc] - the number of components to be used
* @return {Object} - predictions
*/
predictCategory(features, options = {}) {
const {
trueLabels = [],
center = this.center,
scale = this.scale,
} = options;
if (isAnyArray(features)) {
if (features[0].length === undefined) {
features = Matrix.from1DArray(1, features.length, features);
} else {
features = Matrix.checkMatrix(features);
}
}
const prediction = this.predict(features, { trueLabels, center, scale });
const predictiveComponents = Matrix.checkMatrix(
this.output.predictiveComponents,
).to1DArray();
const newTPred = Matrix.checkMatrix(
prediction.predictiveComponents,
).to1DArray();
const categories = getClasses(this.labels);
const classes = this.labels.slice();
const result = [];
for (const pred of newTPred) {
let item;
let auc = 0;
for (const category of categories) {
const testTPred = predictiveComponents.slice();
testTPred.push(pred);
const testClasses = classes.slice();
testClasses.push(category.name);
const rocCurve = getRocCurve(testClasses, testTPred);
const areaUnderCurve = getAuc(rocCurve);
if (auc < areaUnderCurve) {
item = category.name;
auc = areaUnderCurve;
}
}
result.push(item);
}
return result;
}
/**
* Predict scores for new data
* @param {Matrix} features - a matrix containing new data
* @param {Object} [options={}]
* @param {Array} [options.trueLabel] - an array with true values to compute confusion matrix
* @param {Number} [options.nc] - the number of components to be used
* @return {Object} - predictions
*/
predict(features, options = {}) {
const {
trueLabels = [],
center = this.center,
scale = this.scale,
} = options;
let labels;
if (typeof trueLabels[0] === 'number') {
labels = Matrix.from1DArray(trueLabels.length, 1, trueLabels);
} else if (typeof trueLabels[0] === 'string') {
labels = Matrix.checkMatrix(createDummyY(trueLabels)).transpose();
}
features = new Matrix(features);
// scaling the test dataset with respect to the train
if (center) {
features.center('column', { center: this.means });
if (labels?.rows > 0) {
labels.center('column', { center: this.meansY });
}
}
if (scale) {
features.scale('column', { scale: this.stdevs });
if (labels?.rows > 0) {
labels.scale('column', { scale: this.stdevsY });
}
}
const nc =
this.mode === 'regression'
? this.model[0].Q2y.length
: this.model[0].auc.length - 1;
const Eh = features.clone();
// removing the orthogonal components from PLS
let orthogonalScores;
let orthogonalWeights;
let orthogonalLoadings;
let totalPred;
let predictiveComponents;
for (let idx = 0; idx < nc; idx++) {
const model = this.model[idx];
orthogonalWeights = Matrix.checkMatrix(
model.orthogonalWeights,
).transpose();
orthogonalLoadings = Matrix.checkMatrix(model.orthogonalLoadings);
orthogonalScores = Eh.mmul(orthogonalWeights);
Eh.sub(orthogonalScores.mmul(orthogonalLoadings));
// prediction
predictiveComponents = Eh.mmul(
Matrix.checkMatrix(model.plsC.w).transpose(),
);
const components = predictiveComponents
.mmul(model.plsC.betas)
.mmul(Matrix.checkMatrix(model.plsC.q).transpose());
totalPred = new Matrix(components.rows, 1);
for (let i = 0; i < components.rows; i++) {
totalPred.setRow(i, [components.getRowVector(i).sum()]);
}
}
if (labels?.rows > 0) {
if (this.mode === 'regression') {
const tssy = tss(labels);
const press = tss(labels.clone().sub(totalPred));
const Q2y = 1 - press / tssy;
return { predictiveComponents, orthogonalScores, yHat: totalPred, Q2y };
} else if (this.mode === 'discriminantAnalysis') {
const confusionMatrix = ConfusionMatrix.fromLabels(
trueLabels,
totalPred.to1DArray(),
);
const rocCurve = getRocCurve(trueLabels, totalPred.to1DArray());
const auc = getAuc(rocCurve);
return {
predictiveComponents,
orthogonalScores,
yHat: totalPred,
confusionMatrix,
auc,
};
}
} else {
return { predictiveComponents, orthogonalScores, yHat: totalPred };
}
}
_predictAll(data, categories, options = {}) {
// cannot use the global this.center here
// since it is used in the NC loop and
// centering and scaling should only be
// performed once
const { center = true, scale = true } = options;
const features = data.clone();
const labels = categories.clone();
if (center) {
const means = features.mean('column');
features.center('column', { center: means });
labels.center('column');
}
if (scale) {
const stdevs = features.standardDeviation('column');
features.scale('column', { scale: stdevs });
labels.scale('column');
// reevaluate tssy and tssx after scaling
// must be global because re-used for next nc iteration
// tssx is only evaluate the first time
this.tssy = tss(labels);
this.tssx = tss(features);
}
const oplsC = oplsNipals(features, labels);
const plsC = new NIPALS(oplsC.filteredX, { Y: labels });
const predictiveComponents = plsC.t.clone();
// const yHat = tPred.mmul(plsC.betas).mmul(plsC.q.transpose()); // ok
const yHatComponents = predictiveComponents
.mmul(plsC.betas)
.mmul(plsC.q.transpose()); // ok
const yHat = new Matrix(yHatComponents.rows, 1);
for (let i = 0; i < yHatComponents.rows; i++) {
yHat.setRow(i, [yHatComponents.getRowVector(i).sum()]);
}
let rss = 0;
for (let i = 0; i < labels.columns; i++) {
rss += tss(labels.getColumnVector(i).sub(yHat));
}
const R2y = 1 - rss / labels.columns / this.tssy;
const xEx = plsC.t.mmul(plsC.p);
const rssx = tss(xEx);
const R2x = rssx / this.tssx;
return {
R2y,
R2x,
xRes: oplsC.filteredX,
orthogonalScores: oplsC.scoresXOrtho,
orthogonalLoadings: oplsC.loadingsXOrtho,
orthogonalWeights: oplsC.weightsXOrtho,
predictiveComponents,
totalPred: yHat,
XOrth: oplsC.scoresXOrtho.mmul(oplsC.loadingsXOrtho),
oplsC,
plsC,
};
}
/**
*
* @param {*} X - dataset matrix object
* @param {*} group - labels matrix object
* @param {*} index - train and test index (output from getFold())
*/
_getTrainTest(X, group, index) {
const testFeatures = new Matrix(index.testIndex.length, X.columns);
const testLabels = new Matrix(index.testIndex.length, group.columns);
index.testIndex.forEach((el, idx) => {
testFeatures.setRow(idx, X.getRow(el));
testLabels.setRow(idx, group.getRow(el));
});
const trainFeatures = new Matrix(index.trainIndex.length, X.columns);
const trainLabels = new Matrix(index.trainIndex.length, group.columns);
index.trainIndex.forEach((el, idx) => {
trainFeatures.setRow(idx, X.getRow(el));
trainLabels.setRow(idx, group.getRow(el));
});
return {
trainFeatures,
testFeatures,
trainLabels,
testLabels,
};
}
}
function createDummyY(array) {
const features = [...new Set(array)];
const result = [];
if (features.length > 2) {
for (let i = 0; i < features.length; i++) {
const feature = [];
for (let j = 0; j < array.length; j++) {
const point = features[i] === array[j] ? 1 : -1;
feature.push(point);
}
result.push(feature);
}
return result;
} else {
const result = [];
for (let j = 0; j < array.length; j++) {
const point = features[0] === array[j] ? 2 : 1;
result.push(point);
}
return [result];
}
}