diff --git a/src/diffPatch.ts b/src/diffPatch.ts index c9a8cf6..22d5448 100644 --- a/src/diffPatch.ts +++ b/src/diffPatch.ts @@ -1,4 +1,4 @@ -import {cleanupEfficiency, makeDiff, makePatches, stringifyPatches} from '@sanity/diff-match-patch' +import {makePatches, stringifyPatches} from '@sanity/diff-match-patch' import {DiffError} from './diffError.js' import {type Path, pathToString} from './paths.js' import {validateProperty} from './validate.js' @@ -16,7 +16,34 @@ import { type SanityPatchMutation, } from './patches.js' -const ignoredKeys = ['_id', '_type', '_createdAt', '_updatedAt', '_rev'] +/** + * Document keys that are ignored during diff operations. + * These are system-managed fields that should not be included in patches on + * top-level documents and should not be diffed with diff-match-patch. + */ +const SYSTEM_KEYS = ['_id', '_type', '_createdAt', '_updatedAt', '_rev'] + +/** + * Maximum size of strings to consider for diff-match-patch (1MB) + * Based on testing showing consistently good performance up to this size + */ +const DMP_MAX_STRING_SIZE = 1_000_000 + +/** + * Maximum difference in string length before falling back to set operations (40%) + * Above this threshold, likely indicates text replacement which can be slow + */ +const DMP_MAX_STRING_LENGTH_CHANGE_RATIO = 0.4 + +/** + * Minimum string size to apply change ratio check (10KB) + * Small strings are always fast regardless of change ratio + */ +const DMP_MIN_SIZE_FOR_RATIO_CHECK = 10_000 + +const DEFAULT_OPTIONS: PatchOptions = { + hideWarnings: false, +} type PrimitiveValue = string | number | boolean | null | undefined @@ -51,37 +78,6 @@ export interface DocumentStub { [key: string]: unknown } -/** - * Options for the diff-match-patch algorithm. - * - * @public - */ -export interface DiffMatchPatchOptions { - /** - * Whether or not to use diff-match-patch at all - * - * @defaultValue `true` - */ - enabled: boolean - - /** - * Threshold at which to start using diff-match-patch instead of a regular `set` patch. - * - * @defaultValue `30` - */ - lengthThresholdAbsolute: number - - /** - * Only use generated diff-match-patch if the patch length is less than or equal to - * (targetString * relative). Example: A 100 character target with a relative factor - * of 1.2 will allow a 120 character diff-match-patch. If larger than this number, - * it will fall back to a regular `set` patch. - * - * @defaultValue `1.2` - */ - lengthThresholdRelative: number -} - /** * Options for the patch generator * @@ -117,48 +113,6 @@ export interface PatchOptions { * @defaultValue `false` */ hideWarnings?: boolean - - /** - * Options for the diff-match-patch algorithm. - */ - diffMatchPatch?: Partial -} - -/** - * Options for diff generation, where all DMP properties are required - * - * @public - */ -export type DiffOptions = PatchOptions & {diffMatchPatch: Required} - -const defaultOptions = { - hideWarnings: false, - diffMatchPatch: { - enabled: true, - - // Only use diff-match-patch if target string is longer than this threshold - lengthThresholdAbsolute: 30, - - // Only use generated diff-match-patch if the patch length is less than or equal to - // (targetString * relative). Example: A 100 character target with a relative factor - // of 1.2 will allow a 120 character diff-match-patch. If larger than this number, - // it will fall back to a regular `set` patch. - lengthThresholdRelative: 1.2, - }, -} satisfies DiffOptions - -/** - * Merges the default options with the passed in options. - * - * @param options - Options to merge with the defaults - * @returns Merged options - */ -function mergeOptions(options: PatchOptions): DiffOptions { - return { - ...defaultOptions, - ...options, - diffMatchPatch: {...defaultOptions.diffMatchPatch, ...(options.diffMatchPatch || {})}, - } } /** @@ -174,9 +128,8 @@ function mergeOptions(options: PatchOptions): DiffOptions { export function diffPatch( itemA: DocumentStub, itemB: DocumentStub, - opts?: PatchOptions, + options: PatchOptions = {}, ): SanityPatchMutation[] { - const options = mergeOptions(opts || {}) const id = options.id || (itemA._id === itemB._id && itemA._id) const revisionLocked = options.ifRevisionID const ifRevisionID = typeof revisionLocked === 'boolean' ? itemA._rev : revisionLocked @@ -216,7 +169,7 @@ export function diffPatch( export function diffItem( itemA: unknown, itemB: unknown, - opts: DiffOptions = defaultOptions, + options = DEFAULT_OPTIONS, path: Path = [], patches: Patch[] = [], ): Patch[] { @@ -240,11 +193,10 @@ export function diffItem( return patches } - const options = mergeOptions(opts) const dataType = aIsUndefined ? bType : aType const isContainer = dataType === 'object' || dataType === 'array' if (!isContainer) { - return diffPrimitive(itemA as PrimitiveValue, itemB as PrimitiveValue, options, path, patches) + return diffPrimitive(itemA as PrimitiveValue, itemB as PrimitiveValue, path, patches) } if (aType !== bType) { @@ -261,7 +213,7 @@ export function diffItem( function diffObject( itemA: SanityObject, itemB: SanityObject, - options: DiffOptions, + options: PatchOptions, path: Path, patches: Patch[], ) { @@ -297,7 +249,7 @@ function diffObject( function diffArray( itemA: unknown[], itemB: unknown[], - options: DiffOptions, + options: PatchOptions, path: Path, patches: Patch[], ) { @@ -352,7 +304,7 @@ function diffArray( function diffArrayByIndex( itemA: unknown[], itemB: unknown[], - options: DiffOptions, + options: PatchOptions, path: Path, patches: Patch[], ) { @@ -372,7 +324,7 @@ function diffArrayByIndex( function diffArrayByKey( itemA: KeyedSanityObject[], itemB: KeyedSanityObject[], - options: DiffOptions, + options: PatchOptions, path: Path, patches: Patch[], ) { @@ -395,53 +347,133 @@ function diffArrayByKey( return patches } +/** + * Determines whether to use diff-match-patch or fallback to a `set` operation + * when creating a patch to transform a `source` string to `target` string. + * + * `diffMatchPatch` patches are typically preferred to `set` operations because + * they handle conflicts better (when multiple editors work simultaneously) by + * preserving the user's intended and allowing for 3-way merges. + * + * **Heuristic rationale:** + * + * Perf analysis revealed that string length has minimal impact on small, + * keystroke-level changes, but large text replacements (high change ratio) can + * trigger worst-case algorithm behavior. The 40% change ratio threshold is a + * simple heuristic that catches problematic replacement scenarios while + * allowing the algorithm to excel at insertions and deletions. + * + * **Performance characteristics (tested on M2 MacBook Pro):** + * + * *Keystroke-level editing (most common use case):* + * - Small strings (1KB-10KB): 0ms for 1-5 keystrokes, consistently sub-millisecond + * - Medium strings (50KB-200KB): 0ms for 1-5 keystrokes, consistently sub-millisecond + * - 10 simultaneous keystrokes: ~12ms on 100KB strings + * + * *Copy-paste operations (less frequent):* + * - Small copy-paste operations (<50KB): 0-10ms regardless of string length + * - Large insertions/deletions (50KB+): 0-50ms (excellent performance) + * - Large text replacements (50KB+): 70ms-2s+ (can be slow due to algorithm complexity) + * + * **Algorithm details:** + * Uses Myers' diff algorithm with O(ND) time complexity where N=text length and D=edit distance. + * Includes optimizations: common prefix/suffix removal, line-mode processing, and timeout protection. + * + * + * **Test methodology:** + * - Generated realistic word-based text patterns + * - Simulated actual editing behaviors (keystrokes vs copy-paste) + * - Measured performance across string sizes from 1KB to 10MB + * - Validated against edge cases including repetitive text and scattered changes + * + * @param source - The previous version of the text + * @param target - The new version of the text + * @returns true if diff-match-patch should be used, false if fallback to set operation + * + * @example + * ```typescript + * // Keystroke editing - always fast + * shouldUseDiffMatchPatch(largeDoc, largeDocWithTypo) // true, ~0ms + * + * // Small paste - always fast + * shouldUseDiffMatchPatch(doc, docWithSmallInsertion) // true, ~0ms + * + * // Large replacement - potentially slow + * shouldUseDiffMatchPatch(article, completelyDifferentArticle) // false, use set + * ``` + * + * Compatible with @sanity/diff-match-patch@3.2.0 + */ +export function shouldUseDiffMatchPatch(source: string, target: string): boolean { + const maxLength = Math.max(source.length, target.length) + + // Always reject strings larger than our tested size limit + if (maxLength > DMP_MAX_STRING_SIZE) { + return false + } + + // For small strings, always use diff-match-patch regardless of change ratio + // Performance testing showed these are always fast (<10ms) + if (maxLength < DMP_MIN_SIZE_FOR_RATIO_CHECK) { + return true + } + + // Calculate the change ratio to detect large text replacements + // High ratios indicate replacement scenarios which can trigger slow algorithm paths + const lengthDifference = Math.abs(target.length - source.length) + const changeRatio = lengthDifference / maxLength + + // If change ratio is high, likely a replacement operation that could be slow + // Fall back to set operation for better user experience + if (changeRatio > DMP_MAX_STRING_LENGTH_CHANGE_RATIO) { + return false + } + + // All other cases: use diff-match-patch + // This covers keystroke editing and insertion/deletion scenarios which perform excellently + return true +} + function getDiffMatchPatch( - itemA: PrimitiveValue, - itemB: PrimitiveValue, - options: DiffOptions, + source: PrimitiveValue, + target: PrimitiveValue, path: Path, ): DiffMatchPatch | undefined { - const {enabled, lengthThresholdRelative, lengthThresholdAbsolute} = options.diffMatchPatch - const segment = path[path.length - 1] - if ( - !enabled || - // Don't use for anything but strings - typeof itemA !== 'string' || - typeof itemB !== 'string' || - // Don't use for `_key`, `_ref` etc - (typeof segment === 'string' && segment[0] === '_') || - // Don't use on short strings - itemB.length < lengthThresholdAbsolute - ) { - return undefined - } + if (typeof source !== 'string' || typeof target !== 'string') return undefined + const last = path.at(-1) + // don't use diff-match-patch for system keys + if (typeof last === 'string' && last.startsWith('_')) return undefined + if (!shouldUseDiffMatchPatch(source, target)) return undefined - let strPatch = '' try { - const patch = makeDiff(itemA, itemB) - const diff = cleanupEfficiency(patch) - strPatch = stringifyPatches(makePatches(diff)) + // Using `makePatches(string, string)` directly instead of the multi-step approach e.g. + // `stringifyPatches(makePatches(cleanupEfficiency(makeDiff(itemA, itemB))))`. + // this is because `makePatches` internally handles diff generation and + // automatically applies both `cleanupSemantic()` and `cleanupEfficiency()` + // when beneficial, resulting in cleaner code with near identical performance and + // better error handling. + // [source](https://github.com/sanity-io/diff-match-patch/blob/v3.2.0/src/patch/make.ts#L67-L76) + // + // Performance validation (M2 MacBook Pro): + // Both approaches measured at identical performance: + // - 10KB strings: 0-1ms total processing time + // - 100KB strings: 0-1ms total processing time + // - Individual step breakdown: makeDiff(0ms) + cleanup(0ms) + makePatches(0ms) + stringify(~1ms) + const strPatch = stringifyPatches(makePatches(source, target)) + return {op: 'diffMatchPatch', path, value: strPatch} } catch (err) { // Fall back to using regular set patch return undefined } - - // Don't use patch if it's longer than allowed relative threshold. - // Allow a 120 character patch for a 100 character string, - // but don't allow a 800 character patch for a 500 character value. - return strPatch.length > itemB.length * lengthThresholdRelative - ? undefined - : {op: 'diffMatchPatch', path, value: strPatch} } function diffPrimitive( itemA: PrimitiveValue, itemB: PrimitiveValue, - options: DiffOptions, path: Path, patches: Patch[], ): Patch[] { - const dmp = getDiffMatchPatch(itemA, itemB, options, path) + const dmp = getDiffMatchPatch(itemA, itemB, path) patches.push( dmp || { @@ -455,7 +487,7 @@ function diffPrimitive( } function isNotIgnoredKey(key: string) { - return ignoredKeys.indexOf(key) === -1 + return SYSTEM_KEYS.indexOf(key) === -1 } function serializePatches( diff --git a/src/index.ts b/src/index.ts index e07d464..2b61b0d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -15,6 +15,6 @@ export type { UnsetPatch, } from './patches.js' -export type {DiffMatchPatchOptions, DiffOptions, DocumentStub, PatchOptions} from './diffPatch.js' +export type {DocumentStub, PatchOptions} from './diffPatch.js' export type {Path, PathSegment} from './paths.js' diff --git a/test/__snapshots__/api.test.ts.snap b/test/__snapshots__/api.test.ts.snap index eec109f..fd2c504 100644 --- a/test/__snapshots__/api.test.ts.snap +++ b/test/__snapshots__/api.test.ts.snap @@ -18,10 +18,21 @@ exports[`module api > can include ifRevisionID 1`] = ` "set": { "arr[1]": null, "slug._type": "slug", - "slug.current": "die-hard-with-a-vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "slug.current": "@@ -6,7 +6,20 @@ + ard- +-iii ++with-a-vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; @@ -42,9 +53,20 @@ exports[`module api > can pass different document ID 1`] = ` "set": { "arr[1]": null, "slug._type": "slug", - "slug.current": "die-hard-with-a-vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "slug.current": "@@ -6,7 +6,20 @@ + ard- +-iii ++with-a-vengeance +", + }, + "id": "moop", + }, + }, ] `; diff --git a/test/__snapshots__/diff-match-patch.test.ts.snap b/test/__snapshots__/diff-match-patch.test.ts.snap index b85893c..1092df8 100644 --- a/test/__snapshots__/diff-match-patch.test.ts.snap +++ b/test/__snapshots__/diff-match-patch.test.ts.snap @@ -65,10 +65,13 @@ exports[`diff match patch > respects absolute length threshold 1`] = ` [ { "patch": { - "id": "die-hard-ix", - "set": { - "its": "also short", + "diffMatchPatch": { + "its": "@@ -1,5 +1,10 @@ ++also + short +", }, + "id": "die-hard-ix", }, }, ] @@ -96,10 +99,79 @@ exports[`diff match patch > respects relative length threshold 1`] = ` [ { "patch": { - "id": "die-hard-ix", - "set": { - "synopsis": "When a sack of coffee explodes at the Ultra Gnoch asteroid, a man calling himself "Ziltoid" phones Major Case Unit Inspector Mark Cimino at the Tellus police station and claims responsibility for the sack. He orders suspended police officer Lt. John McClane to travel to the asteroid, in his underwear. McClane is flown there by Cimino and three other officers. Ultra Gnoch miner Dave Young spots McClane and tries to get him off the asteroid before the coffee bugs infiltrate the asteroid, but a gang of underpant gnomes attacks McClane and Young, who barely escape.", + "diffMatchPatch": { + "synopsis": "@@ -4,20 +4,21 @@ + n a +-confetti cak ++sack of coffe + e ex +@@ -35,38 +35,28 @@ + the +-Bonwit Teller department store ++Ultra Gnoch asteroid + , a +@@ -80,13 +80,15 @@ + lf %22 +-Simon ++Ziltoid + %22 ph +@@ -122,27 +122,34 @@ + tor +-Walter Cobb ++Mark Cimino + at the ++Tellus + poli +@@ -197,16 +197,12 @@ + the +-confetti ++sack + . He +@@ -258,41 +258,30 @@ + to +-walk through the middle of Harlem ++travel to the asteroid + , in +@@ -311,13 +311,12 @@ + is +-drive ++flow + n th +@@ -327,11 +327,13 @@ + by C +-obb ++imino + and +@@ -359,38 +359,36 @@ + rs. +-Harlem electrician Zeus Carver ++Ultra Gnoch miner Dave Young + spo +@@ -431,33 +431,62 @@ + the ++a + st +-reet ++eroid + before ++t + he +-is kille ++coffee bugs infiltrate the asteroi + d, b +@@ -502,13 +502,23 @@ + of +-youth ++underpant gnome + s at +@@ -539,14 +539,13 @@ + and +-Carver ++Young + , wh +", }, + "id": "die-hard-ix", }, }, ] diff --git a/test/__snapshots__/if-revision.test.ts.snap b/test/__snapshots__/if-revision.test.ts.snap index 97981a4..103bfa3 100644 --- a/test/__snapshots__/if-revision.test.ts.snap +++ b/test/__snapshots__/if-revision.test.ts.snap @@ -8,10 +8,21 @@ exports[`ifRevisionID > can apply revision constraint (inferred from document) 1 "ifRevisionID": "datrev", "set": { "rating": 4, - "title": "Die Hard with a Vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "title": "@@ -6,5 +6,20 @@ + ard +-3 ++with a Vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; @@ -23,9 +34,20 @@ exports[`ifRevisionID > can apply revision constraint (uppercase) 1`] = ` "ifRevisionID": "abc123", "set": { "rating": 4, - "title": "Die Hard with a Vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "title": "@@ -6,5 +6,20 @@ + ard +-3 ++with a Vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; diff --git a/test/__snapshots__/object-arrays.test.ts.snap b/test/__snapshots__/object-arrays.test.ts.snap index 259436f..596d62c 100644 --- a/test/__snapshots__/object-arrays.test.ts.snap +++ b/test/__snapshots__/object-arrays.test.ts.snap @@ -46,10 +46,15 @@ exports[`object arrays > change item 1`] = ` [ { "patch": { - "id": "die-hard-iii", - "set": { - "characters[_key=="simon"].name": "Simon GrĂ¼ber", + "diffMatchPatch": { + "characters[_key=="simon"].name": "@@ -5,8 +5,9 @@ + n Gr +-u ++%C3%BC + ber +", }, + "id": "die-hard-iii", }, }, ] @@ -97,9 +102,20 @@ exports[`object arrays > remove from middle (single) 1`] = ` "id": "die-hard-iii", "set": { "characters[1]._key": "zeus", - "characters[1].name": "Zeus Carver", }, }, }, + { + "patch": { + "diffMatchPatch": { + "characters[1].name": "@@ -1,12 +1,11 @@ +-Simon Grub ++Zeus Carv + er +", + }, + "id": "die-hard-iii", + }, + }, ] `; diff --git a/test/__snapshots__/primitive-arrays.test.ts.snap b/test/__snapshots__/primitive-arrays.test.ts.snap index d959978..b8b5687 100644 --- a/test/__snapshots__/primitive-arrays.test.ts.snap +++ b/test/__snapshots__/primitive-arrays.test.ts.snap @@ -71,10 +71,14 @@ exports[`primitive arrays > remove from middle (single) 1`] = ` }, { "patch": { - "id": "die-hard-iii", - "set": { - "characters[1]": "Zeus Carver", + "diffMatchPatch": { + "characters[1]": "@@ -1,12 +1,11 @@ +-Simon Grub ++Zeus Carv + er +", }, + "id": "die-hard-iii", }, }, ] diff --git a/test/__snapshots__/pt.test.ts.snap b/test/__snapshots__/pt.test.ts.snap index 730128e..fdcee02 100644 --- a/test/__snapshots__/pt.test.ts.snap +++ b/test/__snapshots__/pt.test.ts.snap @@ -13,10 +13,13 @@ exports[`portable text > undo bold change 1`] = ` }, { "patch": { - "id": "die-hard-iii", - "set": { - "portableText[_key=="920ebbba9ada"].children[_key=="80847f72abfd"].text": "La oss begynne med en empty slate.", + "diffMatchPatch": { + "portableText[_key=="920ebbba9ada"].children[_key=="80847f72abfd"].text": "@@ -15,8 +15,20 @@ + med en ++empty slate. +", }, + "id": "die-hard-iii", }, }, ] diff --git a/test/__snapshots__/set-unset.test.ts.snap b/test/__snapshots__/set-unset.test.ts.snap index ecbe824..ef75ef7 100644 --- a/test/__snapshots__/set-unset.test.ts.snap +++ b/test/__snapshots__/set-unset.test.ts.snap @@ -7,10 +7,21 @@ exports[`set/unset > basic nested changes 1`] = ` "id": "die-hard-iii", "set": { "slug._type": "slug", - "slug.current": "die-hard-with-a-vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "slug.current": "@@ -6,7 +6,20 @@ + ard- +-iii ++with-a-vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; @@ -26,11 +37,17 @@ exports[`set/unset > deep nested changes 1`] = ` }, { "patch": { - "id": "die-hard-iii", - "set": { - "products[_key=="item-1"].comparisonFields['support-level']": "advanced", - "products[_key=="item-1"].variants[_key=="variant-1"]['lace-type']": "slick", + "diffMatchPatch": { + "products[_key=="item-1"].comparisonFields['support-level']": "@@ -1,5 +1,8 @@ +-basic ++advanced +", + "products[_key=="item-1"].variants[_key=="variant-1"]['lace-type']": "@@ -1,5 +1,5 @@ +-waxed ++slick +", }, + "id": "die-hard-iii", }, }, ] @@ -73,10 +90,21 @@ exports[`set/unset > set + unset, nested changes 1`] = ` "set": { "arr[1]": null, "slug._type": "slug", - "slug.current": "die-hard-with-a-vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "slug.current": "@@ -6,7 +6,20 @@ + ard- +-iii ++with-a-vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; @@ -87,9 +115,20 @@ exports[`set/unset > simple root-level changes 1`] = ` "id": "die-hard-iii", "set": { "rating": 4, - "title": "Die Hard with a Vengeance", }, }, }, + { + "patch": { + "diffMatchPatch": { + "title": "@@ -6,5 +6,20 @@ + ard +-3 ++with a Vengeance +", + }, + "id": "die-hard-iii", + }, + }, ] `; diff --git a/test/api.test.ts b/test/api.test.ts index bfb2c2b..0dafcfb 100644 --- a/test/api.test.ts +++ b/test/api.test.ts @@ -25,7 +25,7 @@ describe('module api', () => { test('does not throw if ids do not match and id is provided', () => { const b = {...setAndUnset.b, _id: 'zing'} - expect(diffPatch(setAndUnset.a, b, {id: 'yup', hideWarnings: true})).toHaveLength(2) + expect(diffPatch(setAndUnset.a, b, {id: 'yup', hideWarnings: true})).toHaveLength(3) }) test('pathToString throws on invalid path segments', () => { diff --git a/test/data-types.test.ts b/test/data-types.test.ts index 32d5612..1c21397 100644 --- a/test/data-types.test.ts +++ b/test/data-types.test.ts @@ -10,15 +10,22 @@ describe('diff data types', () => { patch: { id: dataTypes.a._id, set: { - title: 'Die Hard with a Vengeance', - rating: 4.24, isFeatured: false, - 'characters[0]': 'Simon Gruber', - 'slug.current': 'die-hard-with-a-vengeance', + rating: 4.24, year: 1995, }, }, }, + { + patch: { + id: 'die-hard-iii', + diffMatchPatch: { + title: '@@ -6,5 +6,20 @@\n ard \n-3\n+with a Vengeance\n', + 'characters[0]': '@@ -1,12 +1,12 @@\n-John McClane\n+Simon Gruber\n', + 'slug.current': '@@ -6,7 +6,20 @@\n ard-\n-iii\n+with-a-vengeance\n', + }, + }, + }, ]) }) @@ -65,10 +72,15 @@ describe('diff data types', () => { number: 1337, 'object.b12': '12', 'object.f13': null, - string: 'bar', }, }, }, + { + patch: { + id: 'abc123', + diffMatchPatch: {string: '@@ -1,3 +1,3 @@\n-foo\n+bar\n'}, + }, + }, ]) }) })