From 2f98225d129c7c1ae6b88a4f0bea9227254fcf91 Mon Sep 17 00:00:00 2001 From: Karsten Schmidt Date: Fri, 20 Mar 2020 14:08:56 +0000 Subject: [PATCH] feat(arrays): add fillRange() & levenshtein() --- packages/arrays/src/fill-range.ts | 39 +++++++ packages/arrays/src/index.ts | 2 + packages/arrays/src/levenshtein.ts | 163 +++++++++++++++++++++++++++++ 3 files changed, 204 insertions(+) create mode 100644 packages/arrays/src/fill-range.ts create mode 100644 packages/arrays/src/levenshtein.ts diff --git a/packages/arrays/src/fill-range.ts b/packages/arrays/src/fill-range.ts new file mode 100644 index 0000000000..f20803e5f6 --- /dev/null +++ b/packages/arrays/src/fill-range.ts @@ -0,0 +1,39 @@ +import type { TypedArray } from "@thi.ng/api"; + +/** + * Fills given array with values in [start .. end) interval from `index` + * and with optional `step` size. + * + * @remarks + * `start` and `end` default to 0 and array length, `step` defaults to 1 + * or -1 (depending on range). Returns array. + * + * @example + * ```ts + * fillRange(new Array(5)) + * // [ 0, 1, 2, 3, 4 ] + * + * fillRange(fillRange([], 0, 10, 20, 2), 5, 20, 8, -2) + * // [ 10, 12, 14, 16, 18, 20, 18, 16, 14, 12, 10 ] + * ``` + * + * @param buf - + * @param index - + * @param start - + * @param end - + * @param step - + */ +export const fillRange = ( + buf: T, + index = 0, + start = 0, + end = buf.length, + step = end > start ? 1 : -1 +) => { + if (step > 0) { + for (; start < end; start += step) buf[index++] = start; + } else { + for (; start > end; start += step) buf[index++] = start; + } + return buf; +}; diff --git a/packages/arrays/src/index.ts b/packages/arrays/src/index.ts index 8808daa355..2ea51d2581 100644 --- a/packages/arrays/src/index.ts +++ b/packages/arrays/src/index.ts @@ -4,9 +4,11 @@ export * from "./ends-with"; export * from "./ensure-array"; export * from "./ensure-iterable"; export * from "./find"; +export * from "./fill-range"; export * from "./fuzzy-match"; export * from "./is-sorted"; export * from "./iterator"; +export * from "./levenshtein"; export * from "./peek"; export * from "./quicksort"; export * from "./shuffle"; diff --git a/packages/arrays/src/levenshtein.ts b/packages/arrays/src/levenshtein.ts new file mode 100644 index 0000000000..d2d6638801 --- /dev/null +++ b/packages/arrays/src/levenshtein.ts @@ -0,0 +1,163 @@ +import { Predicate2 } from "@thi.ng/api"; +import { equiv as _equiv } from "@thi.ng/equiv"; + +const eqStrict = (a: any, b: any) => a === b; + +/** + * Computes Levenshtein distance w/ optionally given `maxDist` (for + * early termination, default: ∞) and equality predicate (default: + * `===`). Returns 0 if both `a` and `b` are equal (based on predicate). + * Returns `Infinity` if actual distance > `maxDist`. + * + * @remarks + * + * Based on: + * - https://en.wikipedia.org/wiki/Levenshtein_distance + * - https://en.wikipedia.org/wiki/Wagner%E2%80%93Fischer_algorithm + * - https://github.com/gustf/js-levenshtein/blob/master/index.js + * + * @example + * ```ts + * levenshtein([1, 2, 3, 4, 5], [1, 2, 4, 3, 5]); + * // 2 + * + * levenshtein( + * [{ id: 1 }, { id: 2 }, { id: 3 }, { id: 4 }, { id: 5 }], + * [{ id: 4 }, { id: 5 }, { id: 3 }, { id: 1 }, { id: 2 }], + * // max dist + * 2, + * // predicate + * (a, b) => a.id === b.id + * ) + * // Infinity + * ``` + * + * @param a - + * @param b - + * @param maxDist - + * @param equiv - + */ +export const levenshtein = ( + a: ArrayLike, + b: ArrayLike, + maxDist = Infinity, + equiv: Predicate2 = eqStrict +): number => { + if (a === b) { + return 0; + } + if (a.length > b.length) { + const tmp = a; + a = b; + b = tmp; + } + + let la = a.length; + let lb = b.length; + while (la > 0 && equiv(a[~-la], b[~-lb])) { + la--; + lb--; + } + + let offset = 0; + while (offset < la && equiv(a[offset], b[offset])) { + offset++; + } + + la -= offset; + lb -= offset; + if (la === 0 || lb < 3) { + return lb; + } + + let x = 0; + let y: number; + let minDist: number; + let d0: number; + let d1: number; + let d2: number; + let d3: number; + let dd: number; + let dy: number; + let ay: T; + let bx0: T; + let bx1: T; + let bx2: T; + let bx3: T; + + const _min = (d0: number, d1: number, d2: number, bx: T, ay: T) => { + return d0 < d1 || d2 < d1 + ? d0 > d2 + ? d2 + 1 + : d0 + 1 + : equiv(ay, bx) + ? d1 + : d1 + 1; + }; + + const vector: (T | number)[] = []; + for (y = 0; y < la; y++) { + vector.push(y + 1, a[offset + y]); + } + + const len = vector.length - 1; + const lb3 = lb - 3; + for (; x < lb3; ) { + bx0 = b[offset + (d0 = x)]; + bx1 = b[offset + (d1 = x + 1)]; + bx2 = b[offset + (d2 = x + 2)]; + bx3 = b[offset + (d3 = x + 3)]; + dd = x += 4; + minDist = Infinity; + for (y = 0; y < len; y += 2) { + dy = vector[y]; + ay = vector[y + 1]; + d0 = _min(dy, d0, d1, bx0, ay); + d1 = _min(d0, d1, d2, bx1, ay); + d2 = _min(d1, d2, d3, bx2, ay); + dd = _min(d2, d3, dd, bx3, ay); + dd < minDist && (minDist = dd); + vector[y] = dd; + d3 = d2; + d2 = d1; + d1 = d0; + d0 = dy; + } + if (minDist > maxDist) return Infinity; + } + + for (; x < lb; ) { + bx0 = b[offset + (d0 = x)]; + dd = ++x; + minDist = Infinity; + for (y = 0; y < len; y += 2) { + dy = vector[y]; + vector[y] = dd = _min(dy, d0, dd, bx0, vector[y + 1]); + dd < minDist && (minDist = dd); + d0 = dy; + } + if (minDist > maxDist) return Infinity; + } + + return dd!; +}; + +/** + * Normalized version of {@link levenshtein}, i.e. the actual L-dist + * divided by the length of the longest input (or `Infinity` if actual + * distance > `maxDist`). + * + * @param a - + * @param b - + * @param maxDist - + * @param equiv - + */ +export const normalizedLevenshtein = ( + a: ArrayLike, + b: ArrayLike, + maxDist = Infinity, + equiv = eqStrict +): number => { + const n = Math.max(a.length, b.length); + return n > 0 ? levenshtein(a, b, maxDist, equiv) / n : 0; +};