/
index.js
306 lines (267 loc) · 13.1 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
var jStat = require('jStat').jStat;
var Abba = function() {};
module.exports.Abba = Abba;
// Friendly wrapper over jStat's normal distribution functions.
Abba.NormalDistribution = function(mean, standardDeviation) {
if (mean === undefined) {
mean = 0;
}
if (standardDeviation === undefined) {
standardDeviation = 1;
}
this.mean = mean;
this.standardDeviation = standardDeviation;
};
Abba.NormalDistribution.prototype = {
density: function(value) {
return jStat.normal.pdf(value, this.mean, this.standardDeviation);
},
// Returns P(x < value) for x standard normal. value may be any number.
cdf: function(value) {
return jStat.normal.cdf(value, this.mean, this.standardDeviation);
},
// Returns P(x > value) for x standard normal. value may be any number.
survival: function(value) {
return 1 - this.cdf(value);
},
// Returns z such that P(x < z) = probability for x standard normal.
// probability must be in (0, 1).
inverseCdf: function(probability) {
return jStat.normal.inv(probability, this.mean, this.standardDeviation);
},
// Returns z such that P(x > z) = probability for x standard normal.
// probability must be in (0, 1).
inverseSurvival: function(probability) {
return this.mean - (this.inverseCdf(probability) - this.mean);
}
};
/* Distribution functions for the binomial distribution. Computes exact binomial results for small
samples and falls back on the normal approximation for large samples.
*/
Abba.BinomialDistribution = function(numTrials, probability) {
this.SMALL_SAMPLE_MAX_TRIALS = 100;
this.numTrials = numTrials;
this.probability = probability;
this.expectation = numTrials * probability;
this.standardDeviation = Math.sqrt(this.expectation * (1 - probability));
// normal approximation to this binomial distribution
this._normal = new Abba.NormalDistribution(this.expectation, this.standardDeviation);
this._lowerTailProbability = this._normal.cdf(-0.5);
this._upperTailProbability = this._normal.survival(numTrials + 0.5);
};
Abba.BinomialDistribution.prototype = {
mass: function(count) {
if (this.numTrials <= this.SMALL_SAMPLE_MAX_TRIALS) {
return jStat.binomial.pdf(count, this.numTrials, this.probability);
} else {
return this._normal.density(count);
}
},
_rescaleProbability: function(probability) {
return probability / (1 - this._lowerTailProbability - this._upperTailProbability);
},
cdf: function(count) {
if (count < 0) {
return 0;
} else if (count >= this.numTrials) {
return 1;
} else if (this.numTrials <= this.SMALL_SAMPLE_MAX_TRIALS) {
return jStat.binomial.cdf(count, this.numTrials, this.probability);
} else {
return this._rescaleProbability(
this._normal.cdf(count + 0.5) - this._lowerTailProbability);
}
},
survival: function(count) {
return 1 - this.cdf(count);
},
inverseCdf: function(probability) {
return Math.max(0, Math.min(this.numTrials, this._normal.inverseCdf(probability)));
},
inverseSurvival: function(probability) {
return Math.max(0, Math.min(this.numTrials, this._normal.inverseSurvival(probability)));
}
};
Abba.ValueWithInterval = function(value, lowerBound, upperBound) {
this.value = value;
this.lowerBound = lowerBound;
this.upperBound = upperBound;
};
// A value with standard error, from which a confidence interval can be derived.
Abba.ValueWithError = function(value, error) {
this.value = value;
this.error = error;
};
Abba.ValueWithError.prototype = {
/* criticalZValue should be the value at which the right-tail probability for a standard
normal distribution equals half the desired alpha = 1 - confidence level:
P(Z > zValue) = 1 - alpha / 2
where Z is an N(0, 1) random variable. Use NormalDistribution.inverseSurvival(), or see
http://en.wikipedia.org/wiki/Standard_normal_table.
*/
confidenceIntervalWidth: function(criticalZValue) {
return criticalZValue * this.error;
},
valueWithInterval: function(criticalZValue, estimatedValue) {
var intervalWidth = this.confidenceIntervalWidth(criticalZValue);
if (estimatedValue === undefined) {
estimatedValue = this.value;
}
return new Abba.ValueWithInterval(estimatedValue,
this.value - intervalWidth,
this.value + intervalWidth);
}
};
// Represents a binomial proportion with numSuccesses successful trials out of numTrials total.
Abba.Proportion = function(numSuccesses, numTrials) {
this.numSuccesses = numSuccesses;
this.numTrials = numTrials;
this._binomial = new Abba.BinomialDistribution(numTrials, numSuccesses / numTrials);
};
Abba.Proportion.prototype = {
/* Compute an estimate of the underlying probability of success.
Uses the "Agresti-Coull" or "adjusted Wald" interval, which can be thought of as a Wald
interval with (zCriticalValue^2 / 2) added to the number of successes and the number of
failures. The estimated probability of success is the center of the interval. This provides
much better coverage than the Wald interval (and many other intervals), though it has the
unintuitive property that the estimated probabilty is not numSuccesses / numTrials. See
(section 1.4.2 and problem 1.24):
Agresti, Alan. Categorical data analysis. New York, NY: John Wiley & Sons; 2002.
An ordinary Wald interval can be obtained by passing zCriticalValue = 0.
*/
pEstimate: function(zCriticalValue) {
var squaredZCriticalValue = zCriticalValue * zCriticalValue;
var adjustedNumTrials = this.numTrials + squaredZCriticalValue;
var adjustedBinomial = new Abba.BinomialDistribution(
adjustedNumTrials,
(this.numSuccesses + squaredZCriticalValue / 2) / adjustedNumTrials);
return new Abba.ValueWithError(
adjustedBinomial.probability,
adjustedBinomial.standardDeviation / adjustedBinomial.numTrials);
}
};
Abba.ProportionComparison = function(baseline, variation) {
this.baseline = baseline;
this.variation = variation;
this._standardNormal = new Abba.NormalDistribution();
};
Abba.ProportionComparison.prototype = {
// Generate an estimate of the difference in success rates between the variation and the
// baseline.
differenceEstimate: function(zCriticalValue) {
var baselineP = this.baseline.pEstimate(zCriticalValue);
var variationP = this.variation.pEstimate(zCriticalValue);
var difference = variationP.value - baselineP.value;
var standardError = Math.sqrt(Math.pow(baselineP.error, 2) + Math.pow(variationP.error, 2));
return new Abba.ValueWithError(difference, standardError);
},
// Return the difference in sucess rates as a proportion of the baseline success rate.
differenceRatio: function(zCriticalValue) {
var baselineValue = this.baseline.pEstimate(zCriticalValue).value;
var ratio = this.differenceEstimate(zCriticalValue).value / baselineValue;
var error = this.differenceEstimate(zCriticalValue).error / baselineValue;
return new Abba.ValueWithError(ratio, error);
},
/* For the given binomial distribution, compute an interval that covers at least
(1 - coverageAlpha) of the total probability mass, centered at the expectation (unless we're
at the boundary). Uses the normal approximation.
*/
_binomialCoverageInterval: function(distribution, coverageAlpha) {
if (distribution.numTrials < 1000) {
// don't even bother trying to optimize for small-ish sample sizes
return [0, distribution.numTrials];
} else {
return [
Math.floor(distribution.inverseCdf(coverageAlpha / 2)),
Math.ceil(distribution.inverseSurvival(coverageAlpha / 2))
];
}
},
/* Given the probability of an event, compute the probability that it happens at least once in
numTests independent tests. This is used to adjust a p-value for multiple comparisons.
When used to adjust alpha instead, this is called a Sidak correction (the logic is the same,
the formula is inverted):
http://en.wikipedia.org/wiki/Bonferroni_correction#.C5.A0id.C3.A1k_correction
*/
_probabilityUnion: function(probability, numTests) {
return 1 - Math.pow(1 - probability, numTests);
},
/* Compute a p-value testing null hypothesis H0: pBaseline == pVariation against alternative
hypothesis H1: pBaseline != pVariation by summing p-values conditioned on individual baseline
success counts. This provides a more accurate correction for multiple testing but scales like
O(sqrt(this.baseline.numTrials)), so can eventually get slow for very large values.
Lower coverageAlpha increases accuracy at the cost of longer runtime. Roughly, the result
will be accurate within no more than coverageAlpha (but this ignores error due to the normal
approximation so isn't guaranteed).
*/
iteratedTest: function(numTests, coverageAlpha) {
var observedAbsoluteDelta = Math.abs(
this.variation.pEstimate(0).value - this.baseline.pEstimate(0).value);
if (observedAbsoluteDelta === 0) {
// a trivial case that the code below does not handle well
return 1;
}
var pooledProportion =
(this.baseline.numSuccesses + this.variation.numSuccesses) /
(this.baseline.numTrials + this.variation.numTrials);
var variationDistribution = new Abba.BinomialDistribution(this.variation.numTrials,
pooledProportion);
var baselineDistribution = new Abba.BinomialDistribution(this.baseline.numTrials,
pooledProportion);
var baselineLimits = this._binomialCoverageInterval(baselineDistribution, coverageAlpha);
var pValue = 0;
for (var baselineSuccesses = baselineLimits[0];
baselineSuccesses <= baselineLimits[1];
baselineSuccesses++) {
var baselineProportion = baselineSuccesses / this.baseline.numTrials;
var lowerTrialCount = Math.floor(
(baselineProportion - observedAbsoluteDelta) * this.variation.numTrials);
var upperTrialCount = Math.ceil(
(baselineProportion + observedAbsoluteDelta) * this.variation.numTrials);
// p-value of variation success counts "at least as extreme" for this particular
// baseline success count
var pValueAtBaseline =
variationDistribution.cdf(lowerTrialCount) +
variationDistribution.survival(upperTrialCount - 1);
// this is exact because we're conditioning on the baseline count, so the multiple
// tests are independent.
var adjustedPValue = this._probabilityUnion(pValueAtBaseline, numTests);
var baselineProbability = baselineDistribution.mass(baselineSuccesses);
pValue += baselineProbability * adjustedPValue;
}
// the remaining baseline values we didn't cover contribute less than coverageAlpha to the
// sum, so adding that amount gives us a conservative upper bound.
return pValue + coverageAlpha;
}
};
// numVariations: number of variations to be compared to the baseline
Abba.Experiment = function(numVariations, baselineNumSuccesses, baselineNumTrials, intervalAlpha) {
this.P_VALUE_PRECISION = 1e-5;
normal = new Abba.NormalDistribution();
this._baseline = new Abba.Proportion(baselineNumSuccesses, baselineNumTrials);
this._numComparisons = Math.max(1, numVariations);
var correctedAlpha = intervalAlpha / this._numComparisons; // Bonferroni correction
this._intervalZCriticalValue = normal.inverseSurvival(correctedAlpha / 2);
};
Abba.Experiment.prototype = {
getBaselineProportion: function() {
return this._baseline
.pEstimate(this._intervalZCriticalValue)
.valueWithInterval(this._intervalZCriticalValue, this._baseline.pEstimate(0).value);
},
getResults: function(numSuccesses, numTrials) {
var trial = new Abba.Proportion(numSuccesses, numTrials);
var comparison = new Abba.ProportionComparison(this._baseline, trial);
return {
proportion: trial
.pEstimate(this._intervalZCriticalValue)
.valueWithInterval(this._intervalZCriticalValue, trial.pEstimate(0).value),
relativeImprovement: comparison
.differenceRatio(this._intervalZCriticalValue)
.valueWithInterval(
this._intervalZCriticalValue,
comparison.differenceRatio(0).value),
pValue: comparison.iteratedTest(this._numComparisons, this.P_VALUE_PRECISION)
};
}
};