From 2ec04e067917a6481559c35523e54fc7f63c284d Mon Sep 17 00:00:00 2001 From: jobo322 Date: Wed, 15 Apr 2026 08:26:51 -0500 Subject: [PATCH 1/2] feat: implementation of xyBinning --- src/xy/__tests__/xyBinning.test.ts | 243 +++++++++++++++++++++++++++++ src/xy/xyBinning.ts | 92 +++++++++++ 2 files changed, 335 insertions(+) create mode 100644 src/xy/__tests__/xyBinning.test.ts create mode 100644 src/xy/xyBinning.ts diff --git a/src/xy/__tests__/xyBinning.test.ts b/src/xy/__tests__/xyBinning.test.ts new file mode 100644 index 00000000..6e518413 --- /dev/null +++ b/src/xy/__tests__/xyBinning.test.ts @@ -0,0 +1,243 @@ +import { expect, test } from 'vitest'; + +import { xyBinning } from '../xyBinning.ts'; + +test('binSize 1 returns a copy', () => { + const data = { + x: [1, 2, 3, 4], + y: [10, 20, 30, 40], + }; + + expect(xyBinning(data, { binSize: 1 })).toStrictEqual({ + x: Float64Array.from([1, 2, 3, 4]), + y: Float64Array.from([10, 20, 30, 40]), + }); +}); + +test('default keepFirstAndLast with even division', () => { + const data = { + x: [1, 2, 3, 4, 5, 6], + y: [10, 20, 30, 40, 50, 60], + }; + + expect(xyBinning(data, { binSize: 2 })).toStrictEqual({ + x: Float64Array.from([1, 2.5, 4.5, 6]), + y: Float64Array.from([10, 25, 45, 60]), + }); + expect(xyBinning(data, { binSize: 3 })).toStrictEqual({ + x: Float64Array.from([1, 3, 5, 6]), + y: Float64Array.from([10, 30, 50, 60]), + }); +}); + +test('default keepFirstAndLast with uneven division', () => { + const data = { + x: [1, 2, 3, 4, 5, 6, 7], + y: [10, 20, 30, 40, 50, 60, 70], + }; + + expect(xyBinning(data, { binSize: 3 })).toStrictEqual({ + x: Float64Array.from([1, 3, 5.5, 7]), + y: Float64Array.from([10, 30, 55, 70]), + }); +}); + +test('accepts Float64Array input', () => { + const data = { + x: Float64Array.from([2, 4, 6, 8]), + y: Float64Array.from([20, 40, 60, 80]), + }; + + expect(xyBinning(data, { binSize: 2 })).toStrictEqual({ + x: Float64Array.from([2, 5, 8]), + y: Float64Array.from([20, 50, 80]), + }); +}); + +test('x is centered in each bin', () => { + const data = { + x: [1, 2, 10, 20], + y: [10, 20, 30, 40], + }; + + expect( + xyBinning(data, { binSize: 2, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([1.5, 15]), + y: Float64Array.from([15, 35]), + }); +}); + +test('default binSize is 10', () => { + const data = { + x: Array.from({ length: 25 }, (_, i) => i + 1), + y: Array.from({ length: 25 }, (_, i) => (i + 1) * 10), + }; + + expect(xyBinning(data)).toStrictEqual({ + x: Float64Array.from([1, 6.5, 16.5, 23, 25]), + y: Float64Array.from([10, 65, 165, 230, 250]), + }); +}); + +test('keepFirstAndLast=false restores pure binning', () => { + const data = { + x: [1, 2, 3, 4, 5, 6], + y: [10, 20, 30, 40, 50, 60], + }; + + expect( + xyBinning(data, { binSize: 2, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([1.5, 3.5, 5.5]), + y: Float64Array.from([15, 35, 55]), + }); + expect( + xyBinning(data, { binSize: 3, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([2, 5]), + y: Float64Array.from([20, 50]), + }); +}); + +test('keepFirstAndLast=false with uneven division', () => { + const data = { + x: [1, 2, 3, 4, 5, 6, 7], + y: [10, 20, 30, 40, 50, 60, 70], + }; + + expect( + xyBinning(data, { binSize: 3, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([2, 5, 7]), + y: Float64Array.from([20, 50, 70]), + }); +}); + +test('throws on invalid binSize', () => { + const data = { + x: [1, 2, 3], + y: [10, 20, 30], + }; + + expect(() => xyBinning(data, { binSize: 0 })).toThrow( + /binSize must be a positive integer/, + ); + expect(() => xyBinning(data, { binSize: 1.5 })).toThrow( + /binSize must be a positive integer/, + ); + expect(() => xyBinning(data, { binSize: -2 })).toThrow( + /binSize must be a positive integer/, + ); +}); + +test('throws on empty input', () => { + expect(() => xyBinning({ x: [], y: [] }, { binSize: 2 })).toThrow( + /input must not be empty/, + ); +}); + +test('throws when x does not contain numbers', () => { + const data = { + x: ['a', 'b', 'c'], + y: [1, 2, 3], + }; + + expect(() => xyBinning(data as any, { binSize: 2 })).toThrow( + /input must contain numbers/, + ); +}); + +test('numberOfPoints splits into N bins', () => { + const data = { + x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + y: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + }; + + expect( + xyBinning(data, { numberOfPoints: 5, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([1.5, 3.5, 5.5, 7.5, 9.5]), + y: Float64Array.from([1.5, 3.5, 5.5, 7.5, 9.5]), + }); +}); + +test('numberOfPoints with uneven split', () => { + const data = { + x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + y: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + }; + + expect( + xyBinning(data, { numberOfPoints: 3, keepFirstAndLast: false }), + ).toStrictEqual({ + x: Float64Array.from([2, 5, 8.5]), + y: Float64Array.from([2, 5, 8.5]), + }); +}); + +test('numberOfPoints with keepFirstAndLast', () => { + const data = { + x: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + y: [10, 20, 30, 40, 50, 60, 70, 80, 90, 100], + }; + + expect(xyBinning(data, { numberOfPoints: 5 })).toStrictEqual({ + x: Float64Array.from([1, 2.5, 5, 8, 10]), + y: Float64Array.from([10, 25, 50, 80, 100]), + }); +}); + +test('numberOfPoints throws when > length', () => { + expect(() => + xyBinning({ x: [1, 2, 3], y: [10, 20, 30] }, { numberOfPoints: 4 }), + ).toThrow(/numberOfPoints must be <= array.length/); +}); + +test('numberOfPoints throws when not a positive integer', () => { + expect(() => + xyBinning({ x: [1, 2, 3], y: [10, 20, 30] }, { numberOfPoints: 0 }), + ).toThrow(/numberOfPoints must be a positive integer/); + expect(() => + xyBinning({ x: [1, 2, 3], y: [10, 20, 30] }, { numberOfPoints: 2.5 }), + ).toThrow(/numberOfPoints must be a positive integer/); +}); + +test('binSize and numberOfPoints are mutually exclusive', () => { + expect(() => + xyBinning( + { x: [1, 2, 3, 4], y: [10, 20, 30, 40] }, + { binSize: 2, numberOfPoints: 2 }, + ), + ).toThrow(/mutually exclusive/); +}); + +test('numberOfPoints=2 with keepFirstAndLast returns endpoints', () => { + expect( + xyBinning( + { x: [1, 2, 3, 4, 5], y: [10, 20, 30, 40, 50] }, + { numberOfPoints: 2 }, + ), + ).toStrictEqual({ + x: Float64Array.from([1, 5]), + y: Float64Array.from([10, 50]), + }); +}); + +test('numberOfPoints < 2 with keepFirstAndLast throws', () => { + expect(() => + xyBinning({ x: [1, 2, 3], y: [10, 20, 30] }, { numberOfPoints: 1 }), + ).toThrow(/numberOfPoints must be >= 2 when keepFirstAndLast is true/); +}); + +test('numberOfPoints=1 with keepFirstAndLast=false averages entire array', () => { + expect( + xyBinning( + { x: [1, 2, 3, 4, 5], y: [10, 20, 30, 40, 50] }, + { numberOfPoints: 1, keepFirstAndLast: false }, + ), + ).toStrictEqual({ + x: Float64Array.from([3]), + y: Float64Array.from([30]), + }); +}); diff --git a/src/xy/xyBinning.ts b/src/xy/xyBinning.ts new file mode 100644 index 00000000..416634d3 --- /dev/null +++ b/src/xy/xyBinning.ts @@ -0,0 +1,92 @@ +import type { DataXY } from 'cheminfo-types'; + +import type { XBinningOptions } from '../x/index.ts'; +import { xBinning, xCheck } from '../x/index.ts'; + +import { xyCheck } from './xyCheck.ts'; + +/** + * Downsample y values by averaging consecutive non-overlapping points and + * place x at the center of each corresponding x bin. + * @param data - Object that contains x and y arrays. + * @param options - Binning options (same as xBinning). + * @returns Downsampled x and y arrays. + */ +export function xyBinning( + data: DataXY, + options: XBinningOptions = {}, +): DataXY { + xyCheck(data); + const { x, y } = data; + xCheck(x); + const binnedY = xBinning(y, options); + const binnedX = new Float64Array(binnedY.length); + const bins = getBins(x.length, options); + + for (let i = 0; i < bins.length; i++) { + const [start, end] = bins[i]; + binnedX[i] = (x[start] + x[end - 1]) / 2; + } + + return { x: binnedX, y: binnedY }; +} + +function getBins( + length: number, + options: XBinningOptions, +): Array<[start: number, end: number]> { + const { numberOfPoints, keepFirstAndLast = true } = options; + if (numberOfPoints !== undefined) { + if (keepFirstAndLast) { + if (numberOfPoints === 2) { + return [ + [0, 1], + [length - 1, length], + ]; + } + + const bins: Array<[number, number]> = [[0, 1]]; + const innerLength = length - 2; + const innerBins = numberOfPoints - 2; + for (let j = 0; j < innerBins; j++) { + const start = 1 + Math.floor((j * innerLength) / innerBins); + const end = 1 + Math.floor(((j + 1) * innerLength) / innerBins); + bins.push([start, end]); + } + bins.push([length - 1, length]); + return bins; + } + + const bins: Array<[number, number]> = []; + for (let j = 0; j < numberOfPoints; j++) { + const start = Math.floor((j * length) / numberOfPoints); + const end = Math.floor(((j + 1) * length) / numberOfPoints); + bins.push([start, end]); + } + return bins; + } + + const effectiveBinSize = options.binSize ?? 10; + if (effectiveBinSize === 1 || (keepFirstAndLast && length <= 2)) { + return Array.from({ length }, (_, i) => [i, i + 1]); + } + + if (keepFirstAndLast) { + const bins: Array<[number, number]> = [[0, 1]]; + const innerLength = length - 2; + for (let i = 0; i < innerLength; i += effectiveBinSize) { + const start = i + 1; + const end = Math.min(start + effectiveBinSize, length - 1); + bins.push([start, end]); + } + bins.push([length - 1, length]); + return bins; + } + + const bins: Array<[number, number]> = []; + for (let i = 0; i < length; i += effectiveBinSize) { + const end = Math.min(i + effectiveBinSize, length); + bins.push([i, end]); + } + return bins; +} From 64c1e3ef72cfc7224952b5445ddd930456ddb19a Mon Sep 17 00:00:00 2001 From: Luc Patiny Date: Wed, 15 Apr 2026 18:03:22 +0200 Subject: [PATCH 2/2] chore: simplify code --- src/xy/xyBinning.ts | 83 +++++---------------------------------------- 1 file changed, 8 insertions(+), 75 deletions(-) diff --git a/src/xy/xyBinning.ts b/src/xy/xyBinning.ts index 416634d3..ec7b3733 100644 --- a/src/xy/xyBinning.ts +++ b/src/xy/xyBinning.ts @@ -1,13 +1,14 @@ import type { DataXY } from 'cheminfo-types'; import type { XBinningOptions } from '../x/index.ts'; -import { xBinning, xCheck } from '../x/index.ts'; +import { xBinning } from '../x/index.ts'; import { xyCheck } from './xyCheck.ts'; /** - * Downsample y values by averaging consecutive non-overlapping points and - * place x at the center of each corresponding x bin. + * Downsample x and y by averaging consecutive non-overlapping points. + * Both arrays are binned with the same partitioning, so the resulting x + * is the mean x of each bin. * @param data - Object that contains x and y arrays. * @param options - Binning options (same as xBinning). * @returns Downsampled x and y arrays. @@ -17,76 +18,8 @@ export function xyBinning( options: XBinningOptions = {}, ): DataXY { xyCheck(data); - const { x, y } = data; - xCheck(x); - const binnedY = xBinning(y, options); - const binnedX = new Float64Array(binnedY.length); - const bins = getBins(x.length, options); - - for (let i = 0; i < bins.length; i++) { - const [start, end] = bins[i]; - binnedX[i] = (x[start] + x[end - 1]) / 2; - } - - return { x: binnedX, y: binnedY }; -} - -function getBins( - length: number, - options: XBinningOptions, -): Array<[start: number, end: number]> { - const { numberOfPoints, keepFirstAndLast = true } = options; - if (numberOfPoints !== undefined) { - if (keepFirstAndLast) { - if (numberOfPoints === 2) { - return [ - [0, 1], - [length - 1, length], - ]; - } - - const bins: Array<[number, number]> = [[0, 1]]; - const innerLength = length - 2; - const innerBins = numberOfPoints - 2; - for (let j = 0; j < innerBins; j++) { - const start = 1 + Math.floor((j * innerLength) / innerBins); - const end = 1 + Math.floor(((j + 1) * innerLength) / innerBins); - bins.push([start, end]); - } - bins.push([length - 1, length]); - return bins; - } - - const bins: Array<[number, number]> = []; - for (let j = 0; j < numberOfPoints; j++) { - const start = Math.floor((j * length) / numberOfPoints); - const end = Math.floor(((j + 1) * length) / numberOfPoints); - bins.push([start, end]); - } - return bins; - } - - const effectiveBinSize = options.binSize ?? 10; - if (effectiveBinSize === 1 || (keepFirstAndLast && length <= 2)) { - return Array.from({ length }, (_, i) => [i, i + 1]); - } - - if (keepFirstAndLast) { - const bins: Array<[number, number]> = [[0, 1]]; - const innerLength = length - 2; - for (let i = 0; i < innerLength; i += effectiveBinSize) { - const start = i + 1; - const end = Math.min(start + effectiveBinSize, length - 1); - bins.push([start, end]); - } - bins.push([length - 1, length]); - return bins; - } - - const bins: Array<[number, number]> = []; - for (let i = 0; i < length; i += effectiveBinSize) { - const end = Math.min(i + effectiveBinSize, length); - bins.push([i, end]); - } - return bins; + return { + x: xBinning(data.x, options), + y: xBinning(data.y, options), + }; }