Skip to content

Commit 8da741e

Browse files
authored
feat: use more performant implementations for calculating mean, mad,aad and variance (#413)
1 parent ae381fd commit 8da741e

File tree

6 files changed

+284
-82
lines changed

6 files changed

+284
-82
lines changed

src/utils.ts

Lines changed: 83 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -253,21 +253,6 @@ export const isFnAsyncResource = (fn: Fn | null | undefined): boolean => {
253253
}
254254
}
255255

256-
/**
257-
* Computes the average of a sample.
258-
* @param samples - the sample
259-
* @returns the average of the sample
260-
*/
261-
const average = (samples: Samples) => {
262-
let result = 0
263-
264-
for (const sample of samples) {
265-
result += sample
266-
}
267-
268-
return result / samples.length
269-
}
270-
271256
/**
272257
* A type representing a samples-array with at least one number.
273258
*/
@@ -286,14 +271,6 @@ export const isValidSamples = (
286271
return Array.isArray(value) && value.length !== 0
287272
}
288273

289-
/**
290-
* Sorts samples and returns a new sorted array.
291-
* @param samples - samples to sort
292-
* @returns new sorted samples
293-
*/
294-
export const toSortedSamples = (samples: Samples): SortedSamples =>
295-
[...samples].sort(sortFn) as SortedSamples
296-
297274
/**
298275
* Sorts samples in place.
299276
* @param samples - samples to sort
@@ -304,21 +281,29 @@ export function sortSamples (
304281
samples.sort(sortFn)
305282
}
306283

307-
/**
308-
* Computes the variance of a sample with Bessel's correction.
309-
* @param samples - the sample
310-
* @param avg - the average of the sample
311-
* @returns the variance of the sample
312-
*/
313-
const variance = (samples: Samples, avg = average(samples)) => {
314-
if (samples.length === 1) {
315-
return 0
284+
export const meanAndVariance = (samples: Samples): { mean: number; vr: number } => {
285+
const len = samples.length
286+
if (len === 1) {
287+
return { mean: samples[0], vr: 0 }
288+
}
289+
290+
let mean = 0
291+
let m = 0
292+
let x = 0
293+
let d = 0
294+
let i = 0
295+
296+
while (i < len) {
297+
x = samples[i++]! // eslint-disable-line @typescript-eslint/no-non-null-assertion
298+
d = x - mean
299+
mean += d / i
300+
m += d * (x - mean)
316301
}
317-
let sumSq = 0
318-
for (const sample of samples) {
319-
sumSq += (sample - avg) ** 2
302+
303+
return {
304+
mean,
305+
vr: m / (len - 1)
320306
}
321-
return sumSq / (samples.length - 1)
322307
}
323308

324309
type Quantile = 0.5 | 0.75 | 0.99 | 0.995 | 0.999
@@ -342,47 +327,78 @@ const quantileSorted = (samples: SortedSamples, q: Quantile): number => {
342327
: samples[baseIndex]!
343328
}
344329

345-
/**
346-
* Computes the median of a sorted sample.
347-
* @param samples - the sorted sample
348-
* @returns the median of the sample
349-
*/
350-
const medianSorted = (samples: SortedSamples) => quantileSorted(samples, 0.5)
351-
352330
/**
353331
* A sort function to be passed to Array.prototype.sort for numbers.
354332
* @param a - first number
355333
* @param b - second number
356334
* @returns a number indicating the sort order
357335
*/
358-
const sortFn = (a: number, b: number) => a - b
336+
export const sortFn = (a: number, b: number) => a - b
359337

360338
/**
361-
* Computes the median of an unsorted sample.
339+
* Computes the average absolute deviation from the mean.
362340
* @param samples - the sample
363-
* @returns the median of the sample
341+
* @param mean - the mean of the sample
342+
* @returns the average absolute deviation
364343
*/
365-
const median = (samples: Samples) => medianSorted(toSortedSamples(samples))
344+
export const absoluteDeviationMean = (samples: Samples, mean: number): number => {
345+
let result = 0
346+
const len = samples.length
347+
348+
let i = 0
349+
350+
while (i < len) {
351+
result += (Math.abs(samples[i++]! - mean) - result) / i // eslint-disable-line @typescript-eslint/no-non-null-assertion
352+
}
353+
354+
return result
355+
}
366356

367357
/**
368-
* Computes the absolute deviation of a sample given an aggregation.
369-
* @param samples - the sample
370-
* @param aggFn - the aggregation function to use
371-
* @param aggValue - the aggregated value to use
372-
* @returns the absolute deviation of the sample given the aggregation
358+
* Computes the median absolute deviation from the median.
359+
* @param samples - the sorted sample
360+
* @param median - the median of the sample
361+
* @returns the median absolute deviation
373362
*/
374-
const absoluteDeviation = <S extends Samples = Samples>(
375-
samples: S,
376-
aggFn: (arr: S) => number,
377-
aggValue = aggFn(samples)
378-
) => {
379-
const absoluteDeviations: S = [] as unknown as S
380-
381-
for (const sample of samples) {
382-
absoluteDeviations.push(Math.abs(sample - aggValue))
383-
}
363+
export function absoluteDeviationMedian (samples: SortedSamples, median: number): number {
364+
const len = samples.length
365+
if (len === 1) return 0
366+
367+
const mid = len >> 1
368+
const halfLen = (len + 1) >> 1
369+
370+
let low = 0
371+
let high = mid
372+
let c1, c2, l1, l2, r1, r2
384373

385-
return aggFn(absoluteDeviations)
374+
while (low <= high) {
375+
c1 = (low + high) >> 1
376+
c2 = halfLen - c1
377+
378+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
379+
l1 = c1 === 0 ? -Infinity : median - samples[mid - c1]!
380+
381+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
382+
r1 = c1 === mid ? Infinity : median - samples[mid - c1 - 1]!
383+
384+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
385+
l2 = c2 === 0 ? -Infinity : samples[mid + c2 - 1]! - median
386+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
387+
r2 = c2 === len - mid ? Infinity : samples[mid + c2]! - median
388+
389+
if (l1 <= r2 && l2 <= r1) {
390+
return len & 1 // check for odd length
391+
? Math.max(l1, l2)
392+
: (Math.max(l1, l2) + Math.min(r1, r2)) / 2
393+
}
394+
395+
if (l1 > r2) {
396+
high = c1 - 1
397+
} else {
398+
low = c1 + 1
399+
}
400+
}
401+
return 0 // should never reach here
386402
}
387403

388404
/**
@@ -392,22 +408,21 @@ const absoluteDeviation = <S extends Samples = Samples>(
392408
* @returns the statistics of the sample
393409
*/
394410
export const getStatisticsSorted = (samples: SortedSamples): Statistics => {
395-
const mean = average(samples)
396-
const vr = variance(samples, mean)
411+
const { mean, vr } = meanAndVariance(samples)
397412
const sd = Math.sqrt(vr)
398413
const sem = sd / Math.sqrt(samples.length)
399414
const df = samples.length - 1
400415
const critical = tTable[df || 1] ?? tTable[0]
401416
const moe = sem * critical
402417
const absMean = Math.abs(mean)
403418
const rme = absMean === 0 ? Infinity : (moe / absMean) * 100
404-
const p50 = medianSorted(samples)
419+
const p50 = quantileSorted(samples, 0.5)
405420

406421
return {
407-
aad: absoluteDeviation(samples, average, mean),
422+
aad: absoluteDeviationMean(samples, mean),
408423
critical,
409424
df,
410-
mad: absoluteDeviation(samples, median, p50),
425+
mad: absoluteDeviationMedian(samples, p50),
411426
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
412427
max: samples[df]!,
413428
mean,
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
import { describe, expect, it } from 'vitest'
2+
3+
import { absoluteDeviationMedian, Samples, type SortedSamples } from '../src/utils'
4+
import { toSortedSamples } from './utils'
5+
6+
// Helper: calculate median of a sorted array
7+
const medianFn = (samples: SortedSamples): number => {
8+
const len = samples.length
9+
const mid = len >> 1
10+
return len & 1
11+
? samples[mid]! // eslint-disable-line @typescript-eslint/no-non-null-assertion
12+
: (samples[mid - 1]! + samples[mid]!) / 2 // eslint-disable-line @typescript-eslint/no-non-null-assertion
13+
}
14+
15+
// Reference implementation: median of absolute deviations
16+
const absoluteDeviationMedianTrivial = (samples: SortedSamples): number => {
17+
const median = medianFn(samples)
18+
const deviations = samples.map(v => Math.abs(v - median)) as Samples
19+
return medianFn(toSortedSamples(deviations))
20+
}
21+
22+
describe('absoluteDeviationMedian()', () => {
23+
it('Simple odd length', () => {
24+
const samples = toSortedSamples([1, 2, 3, 4, 5, 6, 7, 8, 9])
25+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
26+
})
27+
28+
it('Simple even length', () => {
29+
const samples = toSortedSamples([1, 2, 3, 4, 5, 6, 7, 8])
30+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
31+
})
32+
33+
it('With outliers', () => {
34+
const samples = toSortedSamples([1, 2, 3, 100, 200])
35+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
36+
})
37+
38+
it('All same', () => {
39+
const samples = toSortedSamples([5, 5, 5, 5, 5])
40+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
41+
})
42+
43+
it('Two elements', () => {
44+
const samples = toSortedSamples([1, 9])
45+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
46+
})
47+
48+
it('Single element', () => {
49+
const samples = toSortedSamples([42])
50+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
51+
})
52+
53+
it('Symmetric', () => {
54+
const samples = toSortedSamples([1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11])
55+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
56+
})
57+
58+
it('Large spread', () => {
59+
const samples = toSortedSamples([1, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100])
60+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
61+
})
62+
63+
it('Duplicates at start', () => {
64+
const samples = toSortedSamples([1, 1, 1, 5, 10, 15, 20])
65+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
66+
})
67+
68+
it('Duplicates at end', () => {
69+
const samples = toSortedSamples([1, 5, 10, 15, 20, 20, 20])
70+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
71+
})
72+
73+
it('Duplicates around median', () => {
74+
const samples = toSortedSamples([1, 2, 5, 5, 5, 8, 9])
75+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
76+
})
77+
78+
it('Many duplicates', () => {
79+
const samples = toSortedSamples([1, 2, 2, 3, 3, 3, 4, 4, 5])
80+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
81+
})
82+
83+
it('Alternating duplicates', () => {
84+
const samples = toSortedSamples([1, 1, 2, 2, 3, 3, 4, 4, 5, 5])
85+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
86+
})
87+
88+
it('Almost all same with outlier', () => {
89+
const samples = toSortedSamples([5, 5, 5, 5, 5, 5, 5, 100])
90+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
91+
})
92+
93+
it('Two values repeated', () => {
94+
const samples = toSortedSamples([1, 1, 1, 1, 9, 9, 9, 9])
95+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
96+
})
97+
98+
it('Complex duplicates', () => {
99+
const samples = toSortedSamples([1, 1, 2, 3, 3, 3, 4, 5, 5, 6, 6, 6, 6, 7])
100+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(absoluteDeviationMedianTrivial(samples))
101+
})
102+
103+
it('fuzzing test', () => {
104+
const rounds = 1000
105+
const len = 10
106+
107+
for (let j = 0; j < rounds; ++j) {
108+
const samplesArray: Samples = new Array(len) as unknown as Samples
109+
for (let i = 0; i < len; i++) {
110+
samplesArray[i] = (Math.random() * 10)
111+
}
112+
113+
const samples = toSortedSamples(samplesArray)
114+
expect(absoluteDeviationMedian(samples, medianFn(samples))).toBe(
115+
absoluteDeviationMedianTrivial(samples)
116+
)
117+
}
118+
})
119+
})

test/utils-default-convert-task-result-for-console-table.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ import type { Task } from '../src'
55
import {
66
defaultConvertTaskResultForConsoleTable,
77
getStatisticsSorted,
8-
toSortedSamples
98
} from '../src/utils'
9+
import { toSortedSamples } from './utils'
1010

1111
test('defaultConvertTaskResultForConsoleTable - not-started', () => {
1212
expect(defaultConvertTaskResultForConsoleTable({

0 commit comments

Comments
 (0)