diff --git a/package.json b/package.json index 4ff25803d7..7bcf75be71 100644 --- a/package.json +++ b/package.json @@ -89,7 +89,7 @@ "@jsonjoy.com/json-pack": "^1.1.0", "@jsonjoy.com/json-pointer": "^1.0.0", "@jsonjoy.com/json-type": "^1.0.0", - "@jsonjoy.com/util": "^1.4.0", + "@jsonjoy.com/util": "^1.6.0", "arg": "^5.0.2", "hyperdyperid": "^1.2.0", "nano-css": "^5.6.2", @@ -108,6 +108,7 @@ "benchmark": "^2.1.4", "config-galore": "^1.0.0", "editing-traces": "https://github.com/streamich/editing-traces#6494020428530a6e382378b98d1d7e31334e2d7b", + "fast-diff": "^1.3.0", "fast-json-patch": "^3.1.1", "html-webpack-plugin": "^5.6.0", "jest": "^29.7.0", @@ -152,6 +153,7 @@ "", "demo", "json-cli", + "json-crdt-diff", "json-crdt-patch", "json-crdt-extensions", "json-crdt-peritext-ui", @@ -160,6 +162,7 @@ "json-ot", "json-patch-ot", "json-patch", + "json-patch-diff", "json-stable", "json-text", "json-walk", diff --git a/src/json-crdt-diff/JsonCrdtDiff.ts b/src/json-crdt-diff/JsonCrdtDiff.ts new file mode 100644 index 0000000000..c119cb767e --- /dev/null +++ b/src/json-crdt-diff/JsonCrdtDiff.ts @@ -0,0 +1,215 @@ +import {deepEqual} from '@jsonjoy.com/util/lib/json-equal/deepEqual'; +import {cmpUint8Array} from '@jsonjoy.com/util/lib/buffers/cmpUint8Array'; +import {type ITimespanStruct, type ITimestampStruct, type Patch, PatchBuilder, Timespan} from '../json-crdt-patch'; +import {ArrNode, BinNode, ConNode, ObjNode, StrNode, ValNode, VecNode, type JsonNode} from '../json-crdt/nodes'; +import * as str from '../util/diff/str'; +import * as bin from '../util/diff/bin'; +import * as line from '../util/diff/line'; +import {structHashCrdt} from '../json-hash/structHashCrdt'; +import {structHash} from '../json-hash'; +import type {Model} from '../json-crdt/model'; + +export class DiffError extends Error { + constructor(message: string = 'DIFF') { + super(message); + } +} + +export class JsonCrdtDiff { + protected builder: PatchBuilder; + + public constructor(protected readonly model: Model) { + this.builder = new PatchBuilder(model.clock.clone()); + } + + protected diffStr(src: StrNode, dst: string): void { + const view = src.view(); + if (view === dst) return; + const builder = this.builder; + str.apply( + str.diff(view, dst), + view.length, + (pos, txt) => builder.insStr(src.id, !pos ? src.id : src.find(pos - 1)!, txt), + (pos, len) => builder.del(src.id, src.findInterval(pos, len)), + ); + } + + protected diffBin(src: BinNode, dst: Uint8Array): void { + const view = src.view(); + if (cmpUint8Array(view, dst)) return; + const builder = this.builder; + bin.apply( + bin.diff(view, dst), + view.length, + (pos, txt) => builder.insBin(src.id, !pos ? src.id : src.find(pos - 1)!, txt), + (pos, len) => builder.del(src.id, src.findInterval(pos, len)), + ); + } + + protected diffArr(src: ArrNode, dst: unknown[]): void { + const srcLines: string[] = []; + src.children((node) => { + srcLines.push(structHashCrdt(node)); + }); + const dstLines: string[] = []; + const dstLength = dst.length; + for (let i = 0; i < dstLength; i++) dstLines.push(structHash(dst[i])); + const linePatch = line.diff(srcLines, dstLines); + if (!linePatch.length) return; + const inserts: [after: ITimestampStruct, views: unknown[]][] = []; + const deletes: ITimespanStruct[] = []; + const patchLength = linePatch.length; + for (let i = patchLength - 1; i >= 0; i--) { + const [type, posSrc, posDst] = linePatch[i]; + switch (type) { + case line.LINE_PATCH_OP_TYPE.EQL: + break; + case line.LINE_PATCH_OP_TYPE.INS: { + const view = dst[posDst]; + const after = posSrc >= 0 ? src.find(posSrc) : src.id; + if (!after) throw new DiffError(); + inserts.push([after, [view]]); + break; + } + case line.LINE_PATCH_OP_TYPE.DEL: { + const span = src.findInterval(posSrc, 1); + if (!span || !span.length) throw new DiffError(); + deletes.push(...span); + break; + } + case line.LINE_PATCH_OP_TYPE.MIX: { + const view = dst[posDst]; + try { + this.diffAny(src.getNode(posSrc)!, view); + } catch (error) { + if (error instanceof DiffError) { + const span = src.findInterval(posSrc, 1)!; + deletes.push(...span); + const after = posSrc ? src.find(posSrc - 1) : src.id; + if (!after) throw new DiffError(); + inserts.push([after, [view]]); + } else throw error; + } + } + } + } + const builder = this.builder; + const length = inserts.length; + for (let i = 0; i < length; i++) { + const [after, views] = inserts[i]; + builder.insArr( + src.id, + after, + views.map((view) => builder.json(view)), + ); + } + if (deletes.length) builder.del(src.id, deletes); + } + + protected diffObj(src: ObjNode, dst: Record): void { + const builder = this.builder; + const inserts: [key: string, value: ITimestampStruct][] = []; + const srcKeys = new Set(); + // biome-ignore lint: .forEach is fastest here + src.forEach((key) => { + srcKeys.add(key); + const dstValue = dst[key]; + if (dstValue === void 0) inserts.push([key, builder.const(undefined)]); + }); + const keys = Object.keys(dst); + const length = keys.length; + for (let i = 0; i < length; i++) { + const key = keys[i]; + const dstValue = dst[key]; + if (srcKeys.has(key)) { + const child = src.get(key); + if (child) { + try { + this.diffAny(child, dstValue); + continue; + } catch (error) { + if (!(error instanceof DiffError)) throw error; + } + } + } + inserts.push([key, src.get(key) instanceof ConNode ? builder.const(dstValue) : builder.constOrJson(dstValue)]); + } + if (inserts.length) builder.insObj(src.id, inserts); + } + + protected diffVec(src: VecNode, dst: unknown[]): void { + const builder = this.builder; + const edits: [key: number, value: ITimestampStruct][] = []; + const elements = src.elements; + const srcLength = elements.length; + const dstLength = dst.length; + const index = src.doc.index; + const min = Math.min(srcLength, dstLength); + for (let i = dstLength; i < srcLength; i++) { + const id = elements[i]; + if (id) { + const child = index.get(id); + const isDeleted = !child || (child instanceof ConNode && child.val === void 0); + if (isDeleted) return; + edits.push([i, builder.const(void 0)]); + } + } + for (let i = 0; i < min; i++) { + const value = dst[i]; + const child = src.get(i); + if (child) { + try { + this.diffAny(child, value); + continue; + } catch (error) { + if (!(error instanceof DiffError)) throw error; + } + } + edits.push([i, builder.constOrJson(value)]); + } + for (let i = srcLength; i < dstLength; i++) edits.push([i, builder.constOrJson(dst[i])]); + if (edits.length) builder.insVec(src.id, edits); + } + + protected diffVal(src: ValNode, dst: unknown): void { + try { + this.diffAny(src.node(), dst); + } catch (error) { + if (error instanceof DiffError) { + const builder = this.builder; + builder.setVal(src.id, builder.constOrJson(dst)); + } else throw error; + } + } + + public diffAny(src: JsonNode, dst: unknown): void { + if (src instanceof ConNode) { + const val = src.val; + if (val !== dst && !deepEqual(src.val, dst)) throw new DiffError(); + } else if (src instanceof StrNode) { + if (typeof dst !== 'string') throw new DiffError(); + this.diffStr(src, dst); + } else if (src instanceof ObjNode) { + if (!dst || typeof dst !== 'object' || Array.isArray(dst)) throw new DiffError(); + this.diffObj(src, dst as Record); + } else if (src instanceof ValNode) { + this.diffVal(src, dst); + } else if (src instanceof ArrNode) { + if (!Array.isArray(dst)) throw new DiffError(); + this.diffArr(src, dst as unknown[]); + } else if (src instanceof VecNode) { + if (!Array.isArray(dst)) throw new DiffError(); + this.diffVec(src, dst as unknown[]); + } else if (src instanceof BinNode) { + if (!(dst instanceof Uint8Array)) throw new DiffError(); + this.diffBin(src, dst); + } else { + throw new DiffError(); + } + } + + public diff(src: JsonNode, dst: unknown): Patch { + this.diffAny(src, dst); + return this.builder.flush(); + } +} diff --git a/src/json-crdt-diff/__tests__/JsonCrdtDiff-fuzzing.spec.ts b/src/json-crdt-diff/__tests__/JsonCrdtDiff-fuzzing.spec.ts new file mode 100644 index 0000000000..b82d94e7f1 --- /dev/null +++ b/src/json-crdt-diff/__tests__/JsonCrdtDiff-fuzzing.spec.ts @@ -0,0 +1,53 @@ +import {JsonCrdtDiff} from '../JsonCrdtDiff'; +import {Model} from '../../json-crdt/model'; +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; + +const assertDiff = (src: unknown, dst: unknown) => { + const model = Model.create(); + model.api.root(src); + const patch1 = new JsonCrdtDiff(model).diff(model.root, dst); + // console.log(model + ''); + // console.log(patch1 + ''); + model.applyPatch(patch1); + // console.log(model + ''); + expect(model.view()).toEqual(dst); + const patch2 = new JsonCrdtDiff(model).diff(model.root, dst); + expect(patch2.ops.length).toBe(0); +}; + +const iterations = 1000; + +test('from random JSON to random JSON', () => { + for (let i = 0; i < iterations; i++) { + const src = RandomJson.generate(); + const dst = RandomJson.generate(); + // console.log(src); + // console.log(dst); + assertDiff(src, dst); + } +}); + +test('two random arrays of integers', () => { + const iterations = 100; + + const randomArray = () => { + const len = Math.floor(Math.random() * 10); + const arr: unknown[] = []; + for (let i = 0; i < len; i++) { + arr.push(Math.ceil(Math.random() * 13)); + } + return arr; + }; + + for (let i = 0; i < iterations; i++) { + const src = randomArray(); + const dst = randomArray(); + try { + assertDiff(src, dst); + } catch (error) { + console.error('src', src); + console.error('dst', dst); + throw error; + } + } +}); diff --git a/src/json-crdt-diff/__tests__/JsonCrdtDiff.spec.ts b/src/json-crdt-diff/__tests__/JsonCrdtDiff.spec.ts new file mode 100644 index 0000000000..fba503a584 --- /dev/null +++ b/src/json-crdt-diff/__tests__/JsonCrdtDiff.spec.ts @@ -0,0 +1,393 @@ +import {JsonCrdtDiff} from '../JsonCrdtDiff'; +import {type InsStrOp, s} from '../../json-crdt-patch'; +import {Model} from '../../json-crdt/model'; +import {type JsonNode, ValNode} from '../../json-crdt/nodes'; +import {b} from '@jsonjoy.com/util/lib/buffers/b'; + +const assertDiff = (model: Model, src: JsonNode, dst: unknown) => { + const patch1 = new JsonCrdtDiff(model).diff(src, dst); + // console.log(model + ''); + // console.log(dst); + // console.log(patch1 + ''); + model.applyPatch(patch1); + // console.log(model + ''); + expect(src.view()).toEqual(dst); + const patch2 = new JsonCrdtDiff(model).diff(src, dst); + // console.log(patch2 + ''); + expect(patch2.ops.length).toBe(0); +}; + +const assertDiff2 = (src: unknown, dst: unknown) => { + const model = Model.create(); + model.api.root(src); + assertDiff(model, model.root.child(), dst); +}; + +describe('con', () => { + test('binary in "con"', () => { + const model = Model.create( + s.obj({ + field: s.con(new Uint8Array([1, 2, 3])), + }), + ); + const dst = { + field: new Uint8Array([1, 2, 3, 4]), + }; + assertDiff(model, model.root, dst); + }); +}); + +describe('str', () => { + test('insert', () => { + const model = Model.create(); + const src = 'hello world'; + model.api.root({str: src}); + const str = model.api.str(['str']); + const dst = 'hello world!'; + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(patch.ops.length).toBe(1); + expect(patch.ops[0].name()).toBe('ins_str'); + expect((patch.ops[0] as InsStrOp).data).toBe('!'); + expect(str.view()).toBe(src); + model.applyPatch(patch); + expect(str.view()).toBe(dst); + }); + + test('delete', () => { + const model = Model.create(); + const src = 'hello world'; + model.api.root({str: src}); + const str = model.api.str(['str']); + const dst = 'hello world'; + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(patch.ops.length).toBe(1); + expect(patch.ops[0].name()).toBe('del'); + expect(str.view()).toBe(src); + model.applyPatch(patch); + expect(str.view()).toBe(dst); + }); + + test('two inserts', () => { + const model = Model.create(); + const src = '23'; + model.api.root({str: src}); + const str = model.api.str(['str']); + const dst = '2x3y'; + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(str.view()).toBe(src); + model.applyPatch(patch); + expect(str.view()).toBe(dst); + }); + + test('inserts and deletes', () => { + const model = Model.create(); + const src = 'hello world'; + model.api.root({str: src}); + const str = model.api.str(['str']); + const dst = 'Hello world!'; + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(str.view()).toBe(src); + model.applyPatch(patch); + expect(str.view()).toBe(dst); + }); +}); + +describe('bin', () => { + test('insert', () => { + const model = Model.create(); + const bin = b(1, 2, 3, 4, 5); + model.api.root({bin}); + const str = model.api.bin(['bin']); + const dst = b(1, 2, 3, 4, 123, 5); + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(patch.ops.length).toBe(1); + expect(patch.ops[0].name()).toBe('ins_bin'); + expect((patch.ops[0] as InsStrOp).data).toEqual(b(123)); + expect(str.view()).toEqual(bin); + model.applyPatch(patch); + expect(str.view()).toEqual(dst); + }); + + test('creates empty patch for equal values', () => { + const model = Model.create(); + const bin = b(1, 2, 3, 4, 5); + model.api.root({bin}); + const str = model.api.bin(['bin']); + const dst = b(1, 2, 3, 4, 5); + const patch = new JsonCrdtDiff(model).diff(str.node, dst); + expect(patch.ops.length).toBe(0); + }); + + test('delete', () => { + const model = Model.create(); + const src = b(1, 2, 3, 4, 5); + model.api.root({bin: src}); + const bin = model.api.bin(['bin']); + const dst = b(1, 2, 3, 4); + const patch = new JsonCrdtDiff(model).diff(bin.node, dst); + expect(patch.ops.length).toBe(1); + expect(patch.ops[0].name()).toBe('del'); + expect(bin.view()).toEqual(src); + model.applyPatch(patch); + expect(bin.view()).toEqual(dst); + }); + + test('inserts and deletes', () => { + const model = Model.create(); + const src = b(1, 2, 3, 4, 5); + model.api.root({bin: src}); + const bin = model.api.bin(['bin']); + const dst = b(2, 3, 4, 5, 6); + const patch = new JsonCrdtDiff(model).diff(bin.node, dst); + expect(bin.view()).toEqual(src); + model.applyPatch(patch); + expect(bin.view()).toEqual(dst); + }); +}); + +describe('obj', () => { + test('can remove a key', () => { + const model = Model.create(); + model.api.root({ + foo: 'abc', + bar: 'xyz', + }); + const dst = {foo: 'abc'}; + assertDiff(model, model.root.child(), dst); + }); + + test('can add a key', () => { + const model = Model.create(); + model.api.root({ + foo: 'abc', + }); + const dst = {foo: 'abc', bar: 'xyz'}; + assertDiff(model, model.root.child(), dst); + }); + + test('can edit nested string', () => { + const model = Model.create(); + model.api.root({foo: 'abc'}); + const dst = {foo: 'abc!'}; + assertDiff(model, model.root.child(), dst); + }); + + test('can update "con" string key', () => { + const model = Model.create(s.obj({foo: s.con('abc')})); + const dst = {foo: 'abc!'}; + assertDiff(model, model.root.child(), dst); + }); + + test('nested object', () => { + const src = { + nested: { + remove: 123, + edit: 'abc', + }, + }; + const dst = { + nested: { + inserted: null, + edit: 'Abc!', + }, + }; + const model = Model.create(); + model.api.root(src); + assertDiff(model, model.root, dst); + }); + + test('can change "str" key to number or back', () => { + assertDiff2({foo: 'abc'}, {foo: 123}); + assertDiff2({foo: 123}, {foo: 'abc'}); + }); +}); + +describe('vec', () => { + test('can add an element', () => { + const model = Model.create(s.vec(s.con(1))); + const dst = [1, 2]; + assertDiff(model, model.root, dst); + }); + + test('can remove an element', () => { + const model = Model.create(s.vec(s.con(1), s.con(2))); + const dst = [1]; + assertDiff(model, model.root, dst); + }); + + test('can replace element', () => { + const model = Model.create(s.vec(s.con(1))); + const dst = [2]; + assertDiff(model, model.root, dst); + expect(() => model.api.val([0])).toThrow(); + }); + + test('can replace nested "val" node', () => { + const schema = s.vec(s.val(s.con(1))); + const model = Model.create(schema); + const dst = [2]; + assertDiff(model, model.root, dst); + const node = model.api.val([0]); + expect(node.node).toBeInstanceOf(ValNode); + }); +}); + +describe('arr', () => { + describe('insert', () => { + test('can add an element', () => { + const model = Model.create(); + model.api.root([1]); + const dst = [1, 2]; + assertDiff(model, model.root, dst); + }); + + test('can add an element (when list of "con")', () => { + const model = Model.create(s.arr([s.con(1)])); + const dst = [1, 2]; + assertDiff(model, model.root, dst); + }); + + test('can add two elements sequentially', () => { + const model = Model.create(); + model.api.root([1, 4]); + const dst = [1, 2, 3, 4]; + assertDiff(model, model.root, dst); + }); + }); + + describe('delete', () => { + test('can remove an element (end of list)', () => { + const model = Model.create(); + model.api.root([1, 2, 3]); + const dst = [1, 2]; + assertDiff(model, model.root, dst); + }); + + test('can remove a "con" element (end of list)', () => { + const model = Model.create(s.arr([s.con(1), s.con(2), s.con(3)])); + const dst = [1, 2]; + assertDiff(model, model.root, dst); + }); + + test('can remove an element (start of list)', () => { + const model = Model.create(); + model.api.root([1, 2]); + const dst = [2]; + assertDiff(model, model.root, dst); + }); + + test('can remove an element (middle list)', () => { + const model = Model.create(); + model.api.root([1, 2, 3]); + const dst = [1, 3]; + assertDiff(model, model.root, dst); + }); + + test('can remove whole list', () => { + const model = Model.create(); + model.api.root([1, 2, 3]); + const dst: number[] = []; + assertDiff(model, model.root, dst); + }); + }); + + describe('replace', () => { + test('can replace an element', () => { + const model = Model.create(); + model.api.root([1, 2, 3]); + const dst: number[] = [1, 0, 3]; + assertDiff(model, model.root, dst); + }); + + test('can replace an element (when elements are "con")', () => { + const model = Model.create(s.arr([s.con(1), s.con(2), s.con(3)])); + const dst: number[] = [1, 0, 3]; + assertDiff(model, model.root, dst); + }); + + test('can replace an element (different type)', () => { + const model = Model.create(); + model.api.root([1, 2, 3]); + const dst: unknown[] = [1, 'aha', 3]; + assertDiff(model, model.root, dst); + }); + + test('can replace an element (when elements are "con" and different type)', () => { + const model = Model.create(s.arr([s.con(1), s.con(2), s.con(3)])); + const dst: unknown[] = [1, 'asdf', 3]; + assertDiff(model, model.root, dst); + }); + + test('replace nested array - 1', () => { + const model = Model.create(); + model.api.root([[2]]); + const dst: unknown[] = [2]; + assertDiff(model, model.root, dst); + }); + + test('replace nested array - 2', () => { + const model = Model.create(); + model.api.root([[2]]); + const dst: unknown[] = [2, 1]; + assertDiff(model, model.root, dst); + }); + + test('replace nested array - 3', () => { + const model = Model.create(); + model.api.root([[2]]); + const dst: unknown[] = [1, 2, 3]; + assertDiff(model, model.root, dst); + }); + + test('replace nested array - 4', () => { + const model = Model.create(); + model.api.root([1, [2], 3]); + const dst: unknown[] = [1, 2, 3, 4]; + assertDiff(model, model.root, dst); + }); + + test('replace nested array - 5', () => { + const model = Model.create(); + model.api.root([1, [2, 2.4], 3]); + const dst: unknown[] = [1, 2, 3, 4]; + assertDiff(model, model.root, dst); + }); + + test('diff first element, and various replacements later', () => { + const model = Model.create(); + model.api.root([[1, 2, 3, 4, 5], 4, 5, 6, 7, 9, 0]); + const dst: unknown[] = [[1, 2], 4, 77, 7, 'xyz']; + assertDiff(model, model.root, dst); + }); + + test('replaces both elements', () => { + const model = Model.create(); + model.api.root([9, 0]); + const dst: unknown[] = ['xyz']; + assertDiff(model, model.root, dst); + }); + + test('nested changes', () => { + const model = Model.create(); + model.api.root([1, 2, [1, 2, 3, 4, 5, 6], 4, 5, 6, 7, 8, 9, 0]); + const dst: unknown[] = ['2', [1, 2, 34, 5], 4, 77, 7, 8, 'xyz']; + assertDiff(model, model.root, dst); + }); + }); +}); + +describe('scenarios', () => { + test('link element annotation', () => { + const model = Model.create( + s.obj({ + href: s.str('http://example.com/page?tab=1'), + title: s.str('example'), + }), + ); + const dst = { + href: 'https://example.com/page-2', + title: 'Example page', + }; + assertDiff(model, model.root, dst); + }); +}); diff --git a/src/json-crdt-extensions/peritext/Peritext.ts b/src/json-crdt-extensions/peritext/Peritext.ts index 97547aa555..c1d3ef96f1 100644 --- a/src/json-crdt-extensions/peritext/Peritext.ts +++ b/src/json-crdt-extensions/peritext/Peritext.ts @@ -10,7 +10,7 @@ import {Overlay} from './overlay/Overlay'; import {Chars} from './constants'; import {interval, tick} from '../../json-crdt-patch/clock'; import {Model, type StrApi} from '../../json-crdt/model'; -import {CONST, updateNum} from '../../json-hash'; +import {CONST, updateNum} from '../../json-hash/hash'; import {SESSION} from '../../json-crdt-patch/constants'; import {s} from '../../json-crdt-patch'; import {ExtraSlices} from './slice/ExtraSlices'; diff --git a/src/json-crdt-extensions/peritext/block/Block.ts b/src/json-crdt-extensions/peritext/block/Block.ts index 69a6f3372a..f2e245badc 100644 --- a/src/json-crdt-extensions/peritext/block/Block.ts +++ b/src/json-crdt-extensions/peritext/block/Block.ts @@ -1,5 +1,5 @@ import {printTree} from 'tree-dump/lib/printTree'; -import {CONST, updateJson, updateNum} from '../../../json-hash'; +import {CONST, updateJson, updateNum} from '../../../json-hash/hash'; import {MarkerOverlayPoint} from '../overlay/MarkerOverlayPoint'; import {UndefEndIter, type UndefIterator} from '../../../util/iterator'; import {Inline} from './Inline'; diff --git a/src/json-crdt-extensions/peritext/block/__tests__/Inline.key.spec.ts b/src/json-crdt-extensions/peritext/block/__tests__/Inline.key.spec.ts index 57cef694af..652c19b670 100644 --- a/src/json-crdt-extensions/peritext/block/__tests__/Inline.key.spec.ts +++ b/src/json-crdt-extensions/peritext/block/__tests__/Inline.key.spec.ts @@ -1,6 +1,6 @@ import {Timestamp} from '../../../../json-crdt-patch'; import {updateId} from '../../../../json-crdt/hash'; -import {updateNum} from '../../../../json-hash'; +import {updateNum} from '../../../../json-hash/hash'; import { type Kit, setupKit, diff --git a/src/json-crdt-extensions/peritext/overlay/Overlay.ts b/src/json-crdt-extensions/peritext/overlay/Overlay.ts index e0fad1b974..f6f102197d 100644 --- a/src/json-crdt-extensions/peritext/overlay/Overlay.ts +++ b/src/json-crdt-extensions/peritext/overlay/Overlay.ts @@ -8,7 +8,7 @@ import {OverlayPoint} from './OverlayPoint'; import {MarkerOverlayPoint} from './MarkerOverlayPoint'; import {OverlayRefSliceEnd, OverlayRefSliceStart} from './refs'; import {compare, type ITimestampStruct} from '../../../json-crdt-patch/clock'; -import {CONST, updateNum} from '../../../json-hash'; +import {CONST, updateNum} from '../../../json-hash/hash'; import {MarkerSlice} from '../slice/MarkerSlice'; import {UndefEndIter, type UndefIterator} from '../../../util/iterator'; import {SliceBehavior} from '../slice/constants'; diff --git a/src/json-crdt-extensions/peritext/rga/Range.ts b/src/json-crdt-extensions/peritext/rga/Range.ts index 099ec0daed..16f621eba4 100644 --- a/src/json-crdt-extensions/peritext/rga/Range.ts +++ b/src/json-crdt-extensions/peritext/rga/Range.ts @@ -1,6 +1,6 @@ import {Point} from './Point'; import {Anchor} from './constants'; -import {updateNum} from '../../../json-hash'; +import {updateNum} from '../../../json-hash/hash'; import type {ITimestampStruct} from '../../../json-crdt-patch/clock'; import type {Printable} from 'tree-dump/lib/types'; import type {AbstractRga, Chunk} from '../../../json-crdt/nodes/rga'; diff --git a/src/json-crdt-extensions/peritext/slice/PersistedSlice.ts b/src/json-crdt-extensions/peritext/slice/PersistedSlice.ts index 1199b9d314..da841fa6a7 100644 --- a/src/json-crdt-extensions/peritext/slice/PersistedSlice.ts +++ b/src/json-crdt-extensions/peritext/slice/PersistedSlice.ts @@ -13,7 +13,7 @@ import { SliceTypeName, SliceTypeCon, } from './constants'; -import {CONST} from '../../../json-hash'; +import {CONST} from '../../../json-hash/hash'; import {Timestamp} from '../../../json-crdt-patch/clock'; import {prettyOneLine} from '../../../json-pretty'; import {validateType} from './util'; diff --git a/src/json-crdt-extensions/peritext/slice/Slices.ts b/src/json-crdt-extensions/peritext/slice/Slices.ts index 6752839100..8ed6856088 100644 --- a/src/json-crdt-extensions/peritext/slice/Slices.ts +++ b/src/json-crdt-extensions/peritext/slice/Slices.ts @@ -3,7 +3,7 @@ import {printTree} from 'tree-dump/lib/printTree'; import {PersistedSlice} from './PersistedSlice'; import {Timespan, compare, tss} from '../../../json-crdt-patch/clock'; import {updateRga} from '../../../json-crdt/hash'; -import {CONST, updateNum} from '../../../json-hash'; +import {CONST, updateNum} from '../../../json-hash/hash'; import {SliceBehavior, SliceHeaderShift, SliceTupleIndex} from './constants'; import {MarkerSlice} from './MarkerSlice'; import {VecNode} from '../../../json-crdt/nodes'; diff --git a/src/json-crdt-extensions/peritext/util/ChunkSlice.ts b/src/json-crdt-extensions/peritext/util/ChunkSlice.ts index 211c3a8fca..086834bd0b 100644 --- a/src/json-crdt-extensions/peritext/util/ChunkSlice.ts +++ b/src/json-crdt-extensions/peritext/util/ChunkSlice.ts @@ -1,4 +1,4 @@ -import {CONST, updateNum} from '../../../json-hash'; +import {CONST, updateNum} from '../../../json-hash/hash'; import {updateId} from '../../../json-crdt/hash'; import {type ITimestampStruct, Timestamp, printTs} from '../../../json-crdt-patch/clock'; import type {IChunkSlice} from './types'; diff --git a/src/json-crdt/hash.ts b/src/json-crdt/hash.ts index 13955d0e82..fd1c382add 100644 --- a/src/json-crdt/hash.ts +++ b/src/json-crdt/hash.ts @@ -1,4 +1,4 @@ -import {CONST, updateNum} from '../json-hash'; +import {CONST, updateNum} from '../json-hash/hash'; import {ConNode, ValNode, ObjNode, VecNode, ArrNode} from './nodes'; import {AbstractRga} from './nodes/rga'; import {last2} from 'sonic-forest/lib/util2'; diff --git a/src/json-crdt/nodes/arr/ArrNode.ts b/src/json-crdt/nodes/arr/ArrNode.ts index 6792b3c4e8..d6884558b7 100644 --- a/src/json-crdt/nodes/arr/ArrNode.ts +++ b/src/json-crdt/nodes/arr/ArrNode.ts @@ -1,8 +1,8 @@ import {AbstractRga, type Chunk} from '../rga/AbstractRga'; import {type ITimestampStruct, tick} from '../../../json-crdt-patch/clock'; -import type {Model} from '../../model'; import {printBinary} from 'tree-dump/lib/printBinary'; import {printTree} from 'tree-dump/lib/printTree'; +import type {Model} from '../../model'; import type {JsonNode, JsonNodeView} from '..'; import type {Printable} from 'tree-dump/lib/types'; @@ -176,8 +176,12 @@ export class ArrNode /** @ignore */ public children(callback: (node: JsonNode) => void) { const index = this.doc.index; - for (let chunk = this.first(); chunk; chunk = this.next(chunk)) - if (!chunk.del) for (const node of chunk.data!) callback(index.get(node)!); + for (let chunk = this.first(); chunk; chunk = this.next(chunk)) { + const data = chunk.data; + if (!data) continue; + const length = data.length; + for (let i = 0; i < length; i++) callback(index.get(data[i])!); + } } /** @ignore */ diff --git a/src/json-crdt/nodes/obj/ObjNode.ts b/src/json-crdt/nodes/obj/ObjNode.ts index eef679f5e3..2cdfec26ed 100644 --- a/src/json-crdt/nodes/obj/ObjNode.ts +++ b/src/json-crdt/nodes/obj/ObjNode.ts @@ -1,8 +1,9 @@ import {printTree} from 'tree-dump/lib/printTree'; import {compare, type ITimestampStruct, printTs} from '../../../json-crdt-patch/clock'; +import {ConNode} from '../const/ConNode'; +import type {JsonNode, JsonNodeView} from '..'; import type {Model} from '../../model'; import type {Printable} from 'tree-dump/lib/types'; -import type {JsonNode, JsonNodeView} from '..'; /** * Represents a `obj` JSON CRDT node, which is a Last-write-wins (LWW) object. @@ -65,6 +66,15 @@ export class ObjNode = Record callback(index.get(id)!, key)); } + public forEach(callback: (key: string, value: JsonNode) => void) { + const index = this.doc.index; + this.keys.forEach((id, key) => { + const value = index.get(id); + if (!value || (value instanceof ConNode && value.val === void 0)) return; + callback(key, value); + }); + } + // ----------------------------------------------------------------- JsonNode /** diff --git a/src/json-crdt/nodes/vec/VecNode.ts b/src/json-crdt/nodes/vec/VecNode.ts index ed07859624..d860d3f747 100644 --- a/src/json-crdt/nodes/vec/VecNode.ts +++ b/src/json-crdt/nodes/vec/VecNode.ts @@ -66,6 +66,8 @@ export class VecNode implements JsonNode< return currentId; } + // ----------------------------------------------------------------- extension + /** * @ignore */ @@ -109,7 +111,7 @@ export class VecNode implements JsonNode< return buf[0]; } - // ----------------------------------------------------------------- JsonNode + /** ------------------------------------------------------ {@link JsonNode} */ /** * @ignore @@ -176,7 +178,7 @@ export class VecNode implements JsonNode< return 'vec'; } - // ---------------------------------------------------------------- Printable + /** ----------------------------------------------------- {@link Printable} */ public toString(tab: string = ''): string { const extNode = this.ext(); diff --git a/src/json-hash/__tests__/assertStructHash.ts b/src/json-hash/__tests__/assertStructHash.ts new file mode 100644 index 0000000000..6a16b01dfc --- /dev/null +++ b/src/json-hash/__tests__/assertStructHash.ts @@ -0,0 +1,19 @@ +import {structHash as structHash_} from '../structHash'; +import {structHashCrdt} from '../structHashCrdt'; +import {Model} from '../../json-crdt'; + +// biome-ignore lint: \x00 character +const isASCII = (str: string) => /^[\x00-\x7F]*$/.test(str); + +export const assertStructHash = (json: unknown): string => { + const model = Model.create(); + model.api.root(json); + const hash1 = structHashCrdt(model.root); + const hash2 = structHash_(json); + // console.log(hash1); + // console.log(hash2); + expect(hash1).toBe(hash2); + expect(hash2.includes('\n')).toBe(false); + expect(isASCII(hash2)).toBe(true); + return hash2; +}; diff --git a/src/json-hash/__tests__/index.spec.ts b/src/json-hash/__tests__/hash.spec.ts similarity index 73% rename from src/json-hash/__tests__/index.spec.ts rename to src/json-hash/__tests__/hash.spec.ts index 14d5029244..bed2b96c0b 100644 --- a/src/json-hash/__tests__/index.spec.ts +++ b/src/json-hash/__tests__/hash.spec.ts @@ -1,4 +1,4 @@ -import {hash} from '..'; +import {hash} from '../hash'; import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; test('returns the same hash for empty objects', () => { @@ -43,6 +43,24 @@ test('returns the same hash for array with values', () => { expect(res1).toBe(res2); }); +test('different key order returns the same hash', () => { + const res1 = hash({bar: 'asdf', foo: 123}); + const res2 = hash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('same hash for binary data', () => { + const res1 = hash({data: new Uint8Array([1, 2, 3])}); + const res2 = hash({data: new Uint8Array([1, 2, 3])}); + expect(res1).toBe(res2); +}); + +test('different hash for binary data', () => { + const res1 = hash({data: new Uint8Array([1, 2, 3])}); + const res2 = hash({data: new Uint8Array([1, 2, 4])}); + expect(res1).not.toBe(res2); +}); + test('returns different hash for random JSON values', () => { for (let i = 0; i < 100; i++) { const res1 = hash(RandomJson.generate() as any); diff --git a/src/json-hash/__tests__/structHash-automated.spec.ts b/src/json-hash/__tests__/structHash-automated.spec.ts new file mode 100644 index 0000000000..dc0c481610 --- /dev/null +++ b/src/json-hash/__tests__/structHash-automated.spec.ts @@ -0,0 +1,11 @@ +import {documents} from '../../__tests__/json-documents'; +import {binaryDocuments} from '../../__tests__/binary-documents'; +import {assertStructHash} from './assertStructHash'; + +describe('computes structural hashes on fixtures', () => { + for (const {name, json} of [...documents, ...binaryDocuments]) { + test(name, () => { + assertStructHash(json); + }); + } +}); diff --git a/src/json-hash/__tests__/structHash-fuzzing.spec.ts b/src/json-hash/__tests__/structHash-fuzzing.spec.ts new file mode 100644 index 0000000000..f83a976d7b --- /dev/null +++ b/src/json-hash/__tests__/structHash-fuzzing.spec.ts @@ -0,0 +1,11 @@ +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; +import {assertStructHash} from './assertStructHash'; + +const iterations = 100; + +test('computes structural hashes', () => { + for (let i = 0; i < iterations; i++) { + const json = RandomJson.generate(); + assertStructHash(json); + } +}); diff --git a/src/json-hash/__tests__/structHash.spec.ts b/src/json-hash/__tests__/structHash.spec.ts new file mode 100644 index 0000000000..9a91074578 --- /dev/null +++ b/src/json-hash/__tests__/structHash.spec.ts @@ -0,0 +1,86 @@ +import {clone} from '@jsonjoy.com/util/lib/json-clone'; +import {structHash as structHash_} from '../structHash'; +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; + +// biome-ignore lint: \x00 character +const isASCII = (str: string) => /^[\x00-\x7F]*$/.test(str); + +const structHash = (json: unknown): string => { + const hash = structHash_(json); + expect(hash.includes('\n')).toBe(false); + expect(isASCII(hash)).toBe(true); + return hash; +}; + +test('returns the same hash for empty objects', () => { + const res1 = structHash({}); + const res2 = structHash({}); + expect(res1).toBe(res2); +}); + +test('returns the same hash for empty arrays', () => { + const res1 = structHash([]); + const res2 = structHash([]); + const res3 = structHash({}); + expect(res1).toBe(res2); + expect(res1).not.toBe(res3); +}); + +test('returns the same hash for empty strings', () => { + const res1 = structHash(''); + const res2 = structHash(''); + const res3 = structHash({}); + const res4 = structHash([]); + expect(res1).toBe(res2); + expect(res1).not.toBe(res3); + expect(res1).not.toBe(res4); +}); + +test('returns the same hash for object with keys', () => { + const res1 = structHash({foo: 123, bar: 'asdf'}); + const res2 = structHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('different key order returns the same hash', () => { + const res1 = structHash({bar: 'asdf', foo: 123}); + const res2 = structHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('returns the same hash regardless of key order', () => { + const res1 = structHash({bar: 'asdf', foo: 123}); + const res2 = structHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('returns the same hash for array with values', () => { + const res1 = structHash([true, 'asdf', false]); + const res2 = structHash([true, 'asdf', false]); + expect(res1).toBe(res2); +}); + +test('same hash for binary data', () => { + const res1 = structHash({data: new Uint8Array([1, 2, 3])}); + const res2 = structHash({data: new Uint8Array([1, 2, 3])}); + expect(res1).toBe(res2); +}); + +test('different hash for binary data', () => { + const res1 = structHash({data: new Uint8Array([1, 2, 3])}); + const res2 = structHash({data: new Uint8Array([1, 2, 4])}); + expect(res1).not.toBe(res2); +}); + +test('returns different hash for random JSON values', () => { + for (let i = 0; i < 100; i++) { + const json1 = RandomJson.generate() as any; + const res1 = structHash(json1); + const res2 = structHash(RandomJson.generate() as any); + const res3 = structHash(clone(json1)); + expect(res1).not.toBe(res2); + expect(res1).toBe(res3); + expect(res1.includes('\n')).toBe(false); + expect(res2.includes('\n')).toBe(false); + } +}); diff --git a/src/json-hash/__tests__/structHashCrdt.spec.ts b/src/json-hash/__tests__/structHashCrdt.spec.ts new file mode 100644 index 0000000000..b13d0767e9 --- /dev/null +++ b/src/json-hash/__tests__/structHashCrdt.spec.ts @@ -0,0 +1,76 @@ +import {clone} from '@jsonjoy.com/util/lib/json-clone'; +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; +import {assertStructHash} from './assertStructHash'; + +test('returns the same hash for empty objects', () => { + const res1 = assertStructHash({}); + const res2 = assertStructHash({}); + expect(res1).toBe(res2); +}); + +test('returns the same hash for empty arrays', () => { + const res1 = assertStructHash([]); + const res2 = assertStructHash([]); + const res3 = assertStructHash({}); + expect(res1).toBe(res2); + expect(res1).not.toBe(res3); +}); + +test('returns the same hash for empty strings', () => { + const res1 = assertStructHash(''); + const res2 = assertStructHash(''); + const res3 = assertStructHash({}); + const res4 = assertStructHash([]); + expect(res1).toBe(res2); + expect(res1).not.toBe(res3); + expect(res1).not.toBe(res4); +}); + +test('returns the same hash for object with keys', () => { + const res1 = assertStructHash({foo: 123, bar: 'asdf'}); + const res2 = assertStructHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('different key order returns the same hash', () => { + const res1 = assertStructHash({bar: 'asdf', foo: 123}); + const res2 = assertStructHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('returns the same hash regardless of key order', () => { + const res1 = assertStructHash({bar: 'asdf', foo: 123}); + const res2 = assertStructHash({foo: 123, bar: 'asdf'}); + expect(res1).toBe(res2); +}); + +test('returns the same hash for array with values', () => { + const res1 = assertStructHash([true, 'asdf', false]); + const res2 = assertStructHash([true, 'asdf', false]); + expect(res1).toBe(res2); +}); + +test('same hash for binary data', () => { + const res1 = assertStructHash({data: new Uint8Array([1, 2, 3])}); + const res2 = assertStructHash({data: new Uint8Array([1, 2, 3])}); + expect(res1).toBe(res2); +}); + +test('different hash for binary data', () => { + const res1 = assertStructHash({data: new Uint8Array([1, 2, 3])}); + const res2 = assertStructHash({data: new Uint8Array([1, 2, 4])}); + expect(res1).not.toBe(res2); +}); + +test('returns different hash for random JSON values', () => { + for (let i = 0; i < 100; i++) { + const json1 = RandomJson.generate() as any; + const res1 = assertStructHash(json1); + const res2 = assertStructHash(RandomJson.generate() as any); + const res3 = assertStructHash(clone(json1)); + expect(res1).not.toBe(res2); + expect(res1).toBe(res3); + expect(res1.includes('\n')).toBe(false); + expect(res2.includes('\n')).toBe(false); + } +}); diff --git a/src/json-hash/hash.ts b/src/json-hash/hash.ts new file mode 100644 index 0000000000..a2d3747f8d --- /dev/null +++ b/src/json-hash/hash.ts @@ -0,0 +1,70 @@ +import {sort} from '@jsonjoy.com/util/lib/sort/insertion'; +import type {PackValue} from '@jsonjoy.com/json-pack/lib/types'; + +export enum CONST { + START_STATE = 5381, + + NULL = 982452847, + TRUE = 982453247, + FALSE = 982454243, + ARRAY = 982452259, + STRING = 982453601, + OBJECT = 982454533, + BINARY = 982454837, +} + +export const updateNum = (state: number, num: number): number => { + return (state << 5) + state + num; +}; + +export const updateStr = (state: number, str: string): number => { + const length = str.length; + state = updateNum(state, CONST.STRING); + state = updateNum(state, length); + let i = length; + while (i) state = (state << 5) + state + str.charCodeAt(--i); + return state; +}; + +export const updateBin = (state: number, bin: Uint8Array): number => { + const length = bin.length; + state = updateNum(state, CONST.BINARY); + state = updateNum(state, length); + let i = length; + while (i) state = (state << 5) + state + bin[--i]; + return state; +}; + +export const updateJson = (state: number, json: PackValue): number => { + switch (typeof json) { + case 'number': + return updateNum(state, json); + case 'string': + state = updateNum(state, CONST.STRING); + return updateStr(state, json); + case 'object': { + if (json === null) return updateNum(state, CONST.NULL); + if (Array.isArray(json)) { + const length = json.length; + state = updateNum(state, CONST.ARRAY); + for (let i = 0; i < length; i++) state = updateJson(state, json[i]); + return state; + } + if (json instanceof Uint8Array) return updateBin(state, json); + state = updateNum(state, CONST.OBJECT); + const keys = sort(Object.keys(json as object)); + const length = keys.length; + for (let i = 0; i < length; i++) { + const key = keys[i]; + state = updateStr(state, key); + state = updateJson(state, (json as any)[key]); + } + return state; + } + case 'boolean': + return updateNum(state, json ? CONST.TRUE : CONST.FALSE); + } + return state; +}; + +export const hash = (json: PackValue) => updateJson(CONST.START_STATE, json) >>> 0; diff --git a/src/json-hash/index.ts b/src/json-hash/index.ts index 8b0e32c74c..6d361c7ea8 100644 --- a/src/json-hash/index.ts +++ b/src/json-hash/index.ts @@ -1,56 +1,2 @@ -import type {JsonValue} from '@jsonjoy.com/json-pack/lib/types'; -import {sort} from '@jsonjoy.com/util/lib/sort/insertion'; - -export enum CONST { - START_STATE = 5381, - - NULL = 982452847, - TRUE = 982453247, - FALSE = 982454243, - ARRAY = 982452259, - STRING = 982453601, - OBJECT = 982454533, -} - -export const updateNum = (state: number, num: number): number => { - return (state << 5) + state + num; -}; - -export const updateStr = (state: number, str: string): number => { - let i = str.length; - while (i) state = (state << 5) + state + str.charCodeAt(--i); - return state; -}; - -export const updateJson = (state: number, json: JsonValue): number => { - switch (typeof json) { - case 'number': - return updateNum(state, json); - case 'string': - state = updateNum(state, CONST.STRING); - return updateStr(state, json); - case 'object': { - if (json === null) return updateNum(state, CONST.NULL); - if (json instanceof Array) { - const length = json.length; - state = updateNum(state, CONST.ARRAY); - for (let i = 0; i < length; i++) state = updateJson(state, json[i]); - return state; - } - state = updateNum(state, CONST.OBJECT); - const keys = sort(Object.keys(json as object)); - const length = keys.length; - for (let i = 0; i < length; i++) { - const key = keys[i]; - state = updateStr(state, key); - state = updateJson(state, (json as any)[key]); - } - return state; - } - case 'boolean': - return updateNum(state, json ? CONST.TRUE : CONST.FALSE); - } - return state; -}; - -export const hash = (json: JsonValue) => updateJson(CONST.START_STATE, json) >>> 0; +export * from './hash'; +export * from './structHash'; diff --git a/src/json-hash/structHash.ts b/src/json-hash/structHash.ts new file mode 100644 index 0000000000..afe73bdffc --- /dev/null +++ b/src/json-hash/structHash.ts @@ -0,0 +1,47 @@ +import {sort} from '@jsonjoy.com/util/lib/sort/insertion'; +import {hash} from './hash'; + +/** + * Produces a *structural hash* of a JSON value. + * + * This is a hash that is not sensitive to the order of properties in object and + * it preserves spatial information of the JSON nodes. + * + * The hash is guaranteed to contain only printable ASCII characters, excluding + * the newline character. + * + * @param val A JSON value to hash. + */ +export const structHash = (val: unknown): string => { + switch (typeof val) { + case 'string': + return hash(val).toString(36); + case 'number': + case 'bigint': + return val.toString(36); + case 'boolean': + return val ? 'T' : 'F'; + case 'object': + if (val === null) return 'N'; + if (Array.isArray(val)) { + const length = val.length; + let res = '['; + for (let i = 0; i < length; i++) res += structHash(val[i]) + ','; + return res + ']'; + } else if (val instanceof Uint8Array) { + return hash(val).toString(36); + } else { + const keys = Object.keys(val); + sort(keys); + let res = '{'; + const length = keys.length; + for (let i = 0; i < length; i++) { + const key = keys[i]; + res += hash(key).toString(36) + ':' + structHash((val as Record)[key]) + ','; + } + return res + '}'; + } + default: + return 'U'; + } +}; diff --git a/src/json-hash/structHashCrdt.ts b/src/json-hash/structHashCrdt.ts new file mode 100644 index 0000000000..1300f3d6d0 --- /dev/null +++ b/src/json-hash/structHashCrdt.ts @@ -0,0 +1,38 @@ +import {sort} from '@jsonjoy.com/util/lib/sort/insertion'; +import {ArrNode, BinNode, ConNode, type JsonNode, ObjNode, StrNode, ValNode, VecNode} from '../json-crdt'; +import {hash} from './hash'; +import {structHash} from './structHash'; + +/** + * Constructs a structural hash of the view of the node. + * + * Produces a *structural hash* of a JSON CRDT node. Works the same as + * `structHash, but uses the `JsonNode` interface instead of a generic value. + * + * @todo PERF: instead of constructing a "str" and "bin" view, iterate over + * the RGA chunks and hash them directly. + */ +export const structHashCrdt = (node?: JsonNode): string => { + if (node instanceof ConNode) return structHash(node.val); + else if (node instanceof ValNode) return structHashCrdt(node.node()); + else if (node instanceof StrNode) return hash(node.view()).toString(36); + else if (node instanceof ObjNode) { + let res = '{'; + const keys = Array.from(node.keys.keys()); + sort(keys); + const length = keys.length; + for (let i = 0; i < length; i++) { + const key = keys[i]; + const value = node.get(key); + res += hash(key).toString(36) + ':' + structHashCrdt(value) + ','; + } + return res + '}'; + } else if (node instanceof ArrNode || node instanceof VecNode) { + let res = '['; + node.children((child) => { + res += structHashCrdt(child) + ','; + }); + return res + ']'; + } else if (node instanceof BinNode) return hash(node.view()).toString(36); + return 'U'; +}; diff --git a/src/json-patch-diff/JsonPatchDiff.ts b/src/json-patch-diff/JsonPatchDiff.ts new file mode 100644 index 0000000000..1d4d818a58 --- /dev/null +++ b/src/json-patch-diff/JsonPatchDiff.ts @@ -0,0 +1,116 @@ +import {deepEqual} from '@jsonjoy.com/util/lib/json-equal/deepEqual'; +import * as str from '../util/diff/str'; +import * as line from '../util/diff/line'; +import {structHash} from '../json-hash'; +import type {Operation} from '../json-patch/codec/json/types'; + +export class JsonPatchDiff { + protected patch: Operation[] = []; + + protected diffVal(path: string, src: unknown, dst: unknown): void { + if (deepEqual(src, dst)) return; + this.patch.push({op: 'replace', path, value: dst}); + } + + protected diffStr(path: string, src: string, dst: string): void { + if (src === dst) return; + const patch = this.patch; + str.apply( + str.diff(src, dst), + src.length, + (pos, str) => patch.push({op: 'str_ins', path, pos, str}), + (pos, len, str) => patch.push({op: 'str_del', path, pos, len, str}), + ); + } + + protected diffBin(path: string, src: Uint8Array, dst: Uint8Array): void { + throw new Error('Not implemented'); + } + + protected diffObj(path: string, src: Record, dst: Record): void { + const patch = this.patch; + for (const key in src) { + if (key in dst) { + const val1 = src[key]; + const val2 = dst[key]; + if (val1 === val2) continue; + this.diffAny(path + '/' + key, val1, val2); + } else { + patch.push({op: 'remove', path: path + '/' + key}); + } + } + for (const key in dst) { + if (key in src) continue; + patch.push({op: 'add', path: path + '/' + key, value: dst[key]}); + } + } + + protected diffArr(path: string, src: unknown[], dst: unknown[]): void { + const srcLines: string[] = []; + const dstLines: string[] = []; + const srcLen = src.length; + const dstLen = dst.length; + for (let i = 0; i < srcLen; i++) srcLines.push(structHash(src[i])); + for (let i = 0; i < dstLen; i++) dstLines.push(structHash(dst[i])); + const pfx = path + '/'; + const patch = this.patch; + const linePatch = line.diff(srcLines, dstLines); + const length = linePatch.length; + for (let i = length - 1; i >= 0; i--) { + const [type, srcIdx, dstIdx] = linePatch[i]; + switch (type) { + case line.LINE_PATCH_OP_TYPE.EQL: + break; + case line.LINE_PATCH_OP_TYPE.MIX: { + const srcValue = src[srcIdx]; + const dstValue = dst[dstIdx]; + this.diff(pfx + srcIdx, srcValue, dstValue); + break; + } + case line.LINE_PATCH_OP_TYPE.INS: + patch.push({op: 'add', path: pfx + (srcIdx + 1), value: dst[dstIdx]}); + break; + case line.LINE_PATCH_OP_TYPE.DEL: + patch.push({op: 'remove', path: pfx + srcIdx}); + break; + } + } + } + + public diffAny(path: string, src: unknown, dst: unknown): void { + switch (typeof src) { + case 'string': { + if (typeof dst === 'string') this.diffStr(path, src, dst); + else this.diffVal(path, src, dst); + break; + } + case 'number': + case 'boolean': + case 'bigint': { + this.diffVal(path, src, dst); + break; + } + case 'object': { + if (!src || !dst || typeof dst !== 'object') { + this.diffVal(path, src, dst); + return; + } + if (Array.isArray(src)) { + if (Array.isArray(dst)) this.diffArr(path, src, dst); + else this.diffVal(path, src, dst); + return; + } + this.diffObj(path, src as Record, dst as Record); + break; + } + default: + this.diffVal(path, src, dst); + break; + } + } + + public diff(path: string, src: unknown, dst: unknown): Operation[] { + this.diffAny(path, src, dst); + return this.patch; + } +} diff --git a/src/json-patch-diff/__tests__/JsonPatchDiff-fuzzing.spec.ts b/src/json-patch-diff/__tests__/JsonPatchDiff-fuzzing.spec.ts new file mode 100644 index 0000000000..43d4709de3 --- /dev/null +++ b/src/json-patch-diff/__tests__/JsonPatchDiff-fuzzing.spec.ts @@ -0,0 +1,17 @@ +import {assertDiff, randomArray} from './util'; + +const iterations = 100; + +test('two random arrays of integers', () => { + for (let i = 0; i < iterations; i++) { + const src = randomArray(); + const dst = randomArray(); + try { + assertDiff(src, dst); + } catch (error) { + console.error('src', src); + console.error('dst', dst); + throw error; + } + } +}); diff --git a/src/json-patch-diff/__tests__/JsonPatchDiff.spec.ts b/src/json-patch-diff/__tests__/JsonPatchDiff.spec.ts new file mode 100644 index 0000000000..0784af74ab --- /dev/null +++ b/src/json-patch-diff/__tests__/JsonPatchDiff.spec.ts @@ -0,0 +1,297 @@ +import {assertDiff} from './util'; + +describe('str', () => { + test('insert', () => { + const src = 'hello world'; + const dst = 'hello world!'; + assertDiff(src, dst); + }); + + test('delete', () => { + const src = 'hello worldz'; + const dst = 'hello world'; + assertDiff(src, dst); + }); + + test('replace', () => { + const src = 'hello world'; + const dst = 'Hello world'; + assertDiff(src, dst); + }); + + test('various edits', () => { + const src = 'helloo vorldz!'; + const dst = 'Hello, world, buddy!'; + assertDiff(src, dst); + }); +}); + +describe('num', () => { + test('insert', () => { + const src = 1; + const dst = 2; + assertDiff(src, dst); + }); +}); + +describe('obj', () => { + test('can remove single key', () => { + const src = {foo: 1}; + const dst = {}; + assertDiff(src, dst); + }); + + test('replace key', () => { + const src = {foo: 1}; + const dst = {foo: 2}; + assertDiff(src, dst); + }); + + test('diff inner string', () => { + const src = {foo: 'hello'}; + const dst = {foo: 'hello!'}; + assertDiff(src, dst); + }); + + test('string key type change', () => { + assertDiff({foo: 'asdf'}, {foo: 123}); + assertDiff({foo: 123}, {foo: 'asdf'}); + }); + + test('can insert new key', () => { + const src = {}; + const dst = {foo: 'hello!'}; + assertDiff(src, dst); + }); + + test('can change all primitive types', () => { + const src = { + obj: { + nil: null, + bool: true, + num: 1, + str: 'hello', + }, + }; + const dst = { + obj: { + nil: 1, + bool: false, + num: null, + num2: 2, + str: 'hello!', + }, + }; + assertDiff(src, dst); + }); + + test('can diff nested objects', () => { + const src = { + id: 1, + name: 'hello', + nested: { + id: 2, + name: 'world', + description: 'blablabla', + }, + }; + const dst = { + id: 3, + name: 'hello!', + nested: { + id: 2, + description: 'Please dont use "blablabla"', + }, + }; + assertDiff(src, dst); + }); +}); + +describe('arr', () => { + test('string element type change', () => { + assertDiff(['asdf'], [123]); + assertDiff([123], ['asdf']); + }); + + test('can add element to an empty array', () => { + const src: unknown[] = []; + const dst: unknown[] = [1]; + assertDiff(src, dst); + }); + + test('can add two elements to an empty array', () => { + const src: unknown[] = []; + const dst: unknown[] = [0, 1]; + assertDiff(src, dst); + }); + + test('can add three elements to an empty array', () => { + const src: unknown[] = []; + const dst: unknown[] = [0, 1, 2]; + assertDiff(src, dst); + }); + + test('can add multiple elements to an empty array', () => { + const src: unknown[] = []; + const dst: unknown[] = [0, 1, 2, 3, 4, 5]; + assertDiff(src, dst); + }); + + test('can remove and add element', () => { + const src: unknown[] = [0]; + const dst: unknown[] = [1]; + assertDiff(src, dst); + }); + + test('can remove and add two elements', () => { + const src: unknown[] = [0]; + const dst: unknown[] = [1, 2]; + assertDiff(src, dst); + }); + + test('can overwrite the only element', () => { + const src: unknown[] = [0]; + const dst: unknown[] = [2]; + assertDiff(src, dst); + }); + + test('can overwrite second element', () => { + const src: unknown[] = [1, 0]; + const dst: unknown[] = [1, 2]; + assertDiff(src, dst); + }); + + test('can overwrite two elements', () => { + const src: unknown[] = [1, 2, 3, 4]; + const dst: unknown[] = [1, 'x', 'x', 4]; + assertDiff(src, dst); + }); + + test('can overwrite three elements, and add two more', () => { + const src: unknown[] = [1, 2, 3, 4]; + const dst: unknown[] = ['x', 'x', 'x', 4, true, false]; + assertDiff(src, dst); + }); + + test('delete last element', () => { + const src: unknown[] = [1, 2, 3, 4]; + const dst: unknown[] = [1, 2, 3]; + assertDiff(src, dst); + }); + + test('delete first element', () => { + const src: unknown[] = [1, 2, 3, 4]; + const dst: unknown[] = [2, 3, 4]; + assertDiff(src, dst); + }); + + test('delete first two elements', () => { + const src: unknown[] = [1, 2, 3, 4]; + const dst: unknown[] = [3, 4]; + assertDiff(src, dst); + }); + + test('fuzzer - 1', () => { + const src: unknown[] = [11, 10, 4, 6, 3, 1, 5]; + const dst: unknown[] = [7, 3, 13, 7, 9, 9, 9, 4, 9]; + assertDiff(src, dst); + }); +}); + +test('array of objects diff', () => { + const src = [ + { + id: 'xxxx', + name: 'Programming', + description: 'I love programming', + }, + { + id: '123', + name: 'Cookies', + description: 'I love cookies', + }, + { + id: 'xxxx', + name: 'Music', + description: 'I love music', + }, + ]; + const dst = [ + { + id: '123', + name: 'Cookies', + description: 'I love cookies', + }, + { + id: 'yyyy', + name: 'Music', + description: 'I love music', + }, + ]; + assertDiff(src, dst); +}); + +test('complex case', () => { + const src = { + id: 'xxxx-xxxxxx-xxxx-xxxx', + name: 'Ivan', + tags: ['tag1', 'tag2'], + age: 30, + approved: true, + interests: [ + { + id: 'xxxx', + name: 'Programming', + description: 'I love programming', + }, + { + id: '123', + name: 'Cookies', + description: 'I love cookies', + }, + { + id: 'xxxx', + name: 'Music', + description: 'I love music', + }, + ], + address: { + city: 'New York', + state: 'NY', + zip: '10001', + location: { + lat: 40.7128, + lng: -74.006, + }, + }, + }; + const dst = { + id: 'yyyy-yyyyyy-yyyy-yyyy', + name: 'Ivans', + tags: ['tag2', 'tag3', 'tag4'], + age: 31, + approved: false, + interests: [ + { + id: '123', + name: 'Cookies', + description: 'I love cookies', + }, + { + id: 'yyyy', + name: 'Music', + description: 'I love music', + }, + ], + address: { + city: 'New York City', + state: 'NY', + zip: '10002', + location: { + lat: 40.7128, + lng: 123.4567, + }, + }, + }; + assertDiff(src, dst); +}); diff --git a/src/json-patch-diff/__tests__/util.ts b/src/json-patch-diff/__tests__/util.ts new file mode 100644 index 0000000000..306fcca028 --- /dev/null +++ b/src/json-patch-diff/__tests__/util.ts @@ -0,0 +1,25 @@ +import {JsonPatchDiff} from '../JsonPatchDiff'; +import {applyPatch} from '../../json-patch'; + +export const assertDiff = (src: unknown, dst: unknown) => { + const srcNested = {src}; + const patch1 = new JsonPatchDiff().diff('/src', src, dst); + // console.log(src); + // console.log(patch1); + // console.log(dst); + const {doc: res} = applyPatch(srcNested, patch1, {mutate: false}); + // console.log(res); + expect(res).toEqual({src: dst}); + const patch2 = new JsonPatchDiff().diff('/src', (res as any).src, dst); + // console.log(patch2); + expect(patch2.length).toBe(0); +}; + +export const randomArray = () => { + const len = Math.floor(Math.random() * 10); + const arr: unknown[] = []; + for (let i = 0; i < len; i++) { + arr.push(Math.ceil(Math.random() * 13)); + } + return arr; +}; diff --git a/src/json-patch/codegen/ops/test.ts b/src/json-patch/codegen/ops/test.ts index 4877640e2b..d6ae1c43fe 100644 --- a/src/json-patch/codegen/ops/test.ts +++ b/src/json-patch/codegen/ops/test.ts @@ -1,6 +1,6 @@ import type {OpTest} from '../../op'; import {$$find} from '@jsonjoy.com/json-pointer/lib/codegen/find'; -import {$$deepEqual} from '@jsonjoy.com/util/lib/json-equal/$$deepEqual'; +import {deepEqualCodegen} from '@jsonjoy.com/util/lib/json-equal/deepEqualCodegen'; import {type JavaScriptLinked, compileClosure, type JavaScript} from '@jsonjoy.com/util/lib/codegen'; import {predicateOpWrapper} from '../util'; import type {ApplyFn} from '../types'; @@ -9,7 +9,7 @@ export const $$test = (op: OpTest): JavaScriptLinked => { const js = /* js */ ` (function(wrapper){ var find = ${$$find(op.path)}; - var deepEqual = ${$$deepEqual(op.value)}; + var deepEqual = ${deepEqualCodegen(op.value)}; return wrapper(function(doc){ var val = find(doc); if (val === undefined) return ${op.not ? 'true' : 'false'}; diff --git a/src/util/__tests__/strCnt.spec.ts b/src/util/__tests__/strCnt.spec.ts new file mode 100644 index 0000000000..8d2aadba24 --- /dev/null +++ b/src/util/__tests__/strCnt.spec.ts @@ -0,0 +1,41 @@ +import {strCnt} from '../strCnt'; + +test('edge cases', () => { + expect(strCnt('', 'xyz')).toBe(0); + expect(strCnt('xyz', '')).toBe(0); + expect(strCnt('', '')).toBe(0); +}); + +test('can find no occurrences', () => { + expect(strCnt('abc', 'xyz')).toBe(0); + expect(strCnt('a', 'xyz')).toBe(0); + expect(strCnt('xyz', 'xy')).toBe(0); +}); + +test('one occurrence', () => { + expect(strCnt('1', '123')).toBe(1); + expect(strCnt('1', '0123')).toBe(1); + expect(strCnt('1', '01')).toBe(1); + expect(strCnt('aa', 'aa')).toBe(1); + expect(strCnt('aa', 'aaa')).toBe(1); + expect(strCnt('aa', 'aaab')).toBe(1); + expect(strCnt('aa', 'xaaab')).toBe(1); + expect(strCnt('aa', 'xaabc')).toBe(1); +}); + +test('two occurrence', () => { + expect(strCnt('1', '1213')).toBe(2); + expect(strCnt('1', '01123')).toBe(2); + expect(strCnt('1', '101')).toBe(2); + expect(strCnt('aa', 'aaaa')).toBe(2); + expect(strCnt('aa', 'aaabaa')).toBe(2); + expect(strCnt('aa', 'xaaabaaa')).toBe(2); + expect(strCnt('aa', 'xaaaabc')).toBe(2); +}); + +test('can search at offset', () => { + expect(strCnt('1', '1213', 1)).toBe(1); + expect(strCnt('1', '01123', 1)).toBe(2); + expect(strCnt('1', '101', 2)).toBe(1); + expect(strCnt('1', '101', 3)).toBe(0); +}); diff --git a/src/util/diff/__tests__/bin-fuzz.spec.ts b/src/util/diff/__tests__/bin-fuzz.spec.ts new file mode 100644 index 0000000000..49b8aa7caf --- /dev/null +++ b/src/util/diff/__tests__/bin-fuzz.spec.ts @@ -0,0 +1,19 @@ +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; +import {toBuf} from '@jsonjoy.com/util/lib/buffers/toBuf'; +import {assertPatch} from './util'; +import * as bin from '../bin'; + +const str = () => + Math.random() > 0.7 ? RandomJson.genString(Math.ceil(Math.random() * 200)) : Math.random().toString(36).slice(2); +const iterations = 100; + +test('fuzzing diff()', () => { + for (let i = 0; i < iterations; i++) { + const src = toBuf(str()); + const dst = toBuf(str()); + const patch = bin.diff(src, dst); + assertPatch(bin.toStr(src), bin.toStr(dst), patch); + expect(bin.src(patch)).toEqual(src); + expect(bin.dst(patch)).toEqual(dst); + } +}); diff --git a/src/util/diff/__tests__/bin.spec.ts b/src/util/diff/__tests__/bin.spec.ts new file mode 100644 index 0000000000..03d1f3a017 --- /dev/null +++ b/src/util/diff/__tests__/bin.spec.ts @@ -0,0 +1,69 @@ +import {b} from '@jsonjoy.com/util/lib/buffers/b'; +import {toStr, toBin, diff, src, dst} from '../bin'; +import {PATCH_OP_TYPE} from '../str'; + +describe('toHex()', () => { + test('can convert buffer to string', () => { + const buffer = b(1, 2, 3, 4, 5); + const hex = toStr(buffer); + expect(hex).toBe('\x01\x02\x03\x04\x05'); + }); + + test('can convert buffer to string', () => { + const buffer = b(0, 127, 255); + const hex = toStr(buffer); + expect(hex).toBe('\x00\x7f\xff'); + }); +}); + +describe('fromHex()', () => { + test('can convert buffer to string', () => { + const buffer = toBin('\x01\x02\x03\x04\x05'); + expect(buffer).toEqual(b(1, 2, 3, 4, 5)); + }); + + test('can convert buffer to string', () => { + const buffer = toBin('\x00\x7f\xff'); + expect(buffer).toEqual(b(0, 127, 255)); + }); +}); + +describe('diff()', () => { + test('returns a single equality tuple, when buffers are identical', () => { + const patch = diff(b(1, 2, 3), b(1, 2, 3)); + expect(patch).toEqual([[PATCH_OP_TYPE.EQL, toStr(b(1, 2, 3))]]); + expect(src(patch)).toEqual(b(1, 2, 3)); + expect(dst(patch)).toEqual(b(1, 2, 3)); + }); + + test('single character insert at the beginning', () => { + const patch1 = diff(b(1, 2, 3), b(0, 1, 2, 3)); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.INS, toStr(b(0))], + [PATCH_OP_TYPE.EQL, toStr(b(1, 2, 3))], + ]); + expect(src(patch1)).toEqual(b(1, 2, 3)); + expect(dst(patch1)).toEqual(b(0, 1, 2, 3)); + }); + + test('single character insert at the end', () => { + const patch1 = diff(b(1, 2, 3), b(1, 2, 3, 4)); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.EQL, toStr(b(1, 2, 3))], + [PATCH_OP_TYPE.INS, toStr(b(4))], + ]); + expect(src(patch1)).toEqual(b(1, 2, 3)); + expect(dst(patch1)).toEqual(b(1, 2, 3, 4)); + }); + + test('can delete char', () => { + const patch1 = diff(b(1, 2, 3), b(2, 3, 4)); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.DEL, toStr(b(1))], + [PATCH_OP_TYPE.EQL, toStr(b(2, 3))], + [PATCH_OP_TYPE.INS, toStr(b(4))], + ]); + expect(src(patch1)).toEqual(b(1, 2, 3)); + expect(dst(patch1)).toEqual(b(2, 3, 4)); + }); +}); diff --git a/src/util/diff/__tests__/line-fuzzer.spec.ts b/src/util/diff/__tests__/line-fuzzer.spec.ts new file mode 100644 index 0000000000..5d7c552113 --- /dev/null +++ b/src/util/diff/__tests__/line-fuzzer.spec.ts @@ -0,0 +1,30 @@ +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; +import {assertDiff} from './line'; + +const iterations = 1000; +const minElements = 2; +const maxElements = 6; + +test('produces valid patch', () => { + for (let i = 0; i < iterations; i++) { + const elements = minElements + Math.ceil(Math.random() * (maxElements - minElements)); + const src: string[] = []; + const dst: string[] = []; + for (let i = 0; i < elements; i++) { + const json = RandomJson.generate({nodeCount: 5}); + if (Math.random() > 0.5) { + src.push(JSON.stringify(json)); + } + if (Math.random() > 0.5) { + dst.push(JSON.stringify(json)); + } + } + try { + assertDiff(src, dst); + } catch (error) { + console.log('SRC', src); + console.log('DST', dst); + throw error; + } + } +}); diff --git a/src/util/diff/__tests__/line.spec.ts b/src/util/diff/__tests__/line.spec.ts new file mode 100644 index 0000000000..3572e9e585 --- /dev/null +++ b/src/util/diff/__tests__/line.spec.ts @@ -0,0 +1,506 @@ +import * as line from '../line'; +import {assertDiff} from './line'; + +describe('diff', () => { + test('delete all lines', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst: string[] = []; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [-1, 0, -1, [[-1, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [-1, 1, -1, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, -1, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [-1, 3, -1, [[-1, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('delete all but first line', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = ['{"id": "xxx-xxxxxxx", "name": "Hello, world"}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [-1, 1, 0, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, 0, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [-1, 3, 0, [[-1, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('delete all but middle lines line', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = ['{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [-1, 0, -1, [[-1, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [0, 1, 0, [[0, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 2, 1, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [-1, 3, 1, [[-1, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('delete all but the last line', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = ['{"id": "abc", "name": "Merry Jane"}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [-1, 0, -1, [[-1, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [-1, 1, -1, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, -1, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 0, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('normalize line beginnings (delete two middle ones)', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = ['{"id": "xxx-xxxxxxx", "name": "Hello, world"}', '{"id": "abc", "name": "Merry Jane"}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [-1, 1, 0, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, 0, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 1, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('normalize line endings', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "hello world!"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = ['{"id": "xxx-xxxxxxx", "name": "Hello, world"}', '{"id": "abc", "name": "Merry Jane!"}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [ + 2, + 0, + 0, + [ + [0, '{"id": "xxx-xxxxxxx", "name": "'], + [-1, 'h'], + [1, 'H'], + [0, 'ello'], + [1, ','], + [0, ' world'], + [-1, '!'], + [0, '"}'], + ], + ], + [-1, 1, 0, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, 0, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [ + 2, + 3, + 1, + [ + [0, '{"id": "abc", "name": "Merry Jane'], + [1, '!'], + [0, '"}'], + ], + ], + ]); + }); + + test('move first line to the end', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [-1, 0, -1, [[-1, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [0, 1, 0, [[0, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 2, 1, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 2, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + [1, 3, 3, [[1, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + ]); + }); + + test('move second line to the end', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [-1, 1, 0, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 2, 1, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 2, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + [1, 3, 3, [[1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + ]); + }); + + test('swap third and fourth lines', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "abc", "name": "Merry Jane"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [0, 1, 1, [[0, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [1, 1, 2, [[1, '{"id": "abc", "name": "Merry Jane"}']]], + [0, 2, 3, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [-1, 3, 3, [[-1, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('move last line to the beginning', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "abc", "name": "Merry Jane"}', + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [1, -1, 0, [[1, '{"id": "abc", "name": "Merry Jane"}']]], + [0, 0, 1, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [0, 1, 2, [[0, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 2, 3, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [-1, 3, 3, [[-1, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('move second to last line to the beginning', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "xxx-xxxxxxx", "name": "Hello, world"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [1, -1, 0, [[1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 0, 1, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world"}']]], + [0, 1, 2, [[0, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [-1, 2, 2, [[-1, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 3, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('swap first and second lines', () => { + const src = [ + '{"id": "xxx-xxxxxxx", "name": "Hello, world!!!!!!!!!!!!!!!!!!!!!!!!!"}', + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const dst = [ + '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}', + '{"id": "xxx-xxxxxxx", "name": "Hello, world!!!!!!!!!!!!!!!!!!!!!!!!!"}', + '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}', + '{"id": "abc", "name": "Merry Jane"}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [1, -1, 0, [[1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 0, 1, [[0, '{"id": "xxx-xxxxxxx", "name": "Hello, world!!!!!!!!!!!!!!!!!!!!!!!!!"}']]], + [-1, 1, 1, [[-1, '{"id": "xxx-yyyyyyy", "name": "Joe Doe"}']]], + [0, 2, 2, [[0, '{"id": "lkasdjflkasjdf", "name": "Winston Churchill"}']]], + [0, 3, 3, [[0, '{"id": "abc", "name": "Merry Jane"}']]], + ]); + }); + + test('fuze two elements into one', () => { + const src = [ + '{"asdfasdfasdf": 2398239234, "aaaa": "aaaaaaa"}', + '{"bbbb": "bbbbbbbbbbbbbbb", "cccc": "ccccccccccccccccc"}', + '{"this": "is a test", "number": 1234567890}', + ]; + const dst = ['{"aaaa": "aaaaaaa", "bbbb": "bbbbbbbbbbbbbbb"}', '{"this": "is a test", "number": 1234567890}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [ + -1, + 0, + -1, + [ + [0, '{"a'], + [-1, 'sdfasdfasdf": 2398239234, "a'], + [0, 'aaa": "aaaaaaa"'], + [-1, '}'], + ], + ], + [ + 2, + 1, + 0, + [ + [-1, '{'], + [1, ', '], + [0, '"bbbb": "bbbbbbbbbbbbbbb'], + [-1, '", "cccc": "ccccccccccccccccc'], + [0, '"}'], + ], + ], + [0, 2, 1, [[0, '{"this": "is a test", "number": 1234567890}']]], + ]); + }); + + test('split two elements into one', () => { + const src = ['{"aaaa": "aaaaaaa", "bbbb": "bbbbbbbbbbbbbbb"}', '{"this": "is a test", "number": 1234567890}']; + const dst = [ + '{"asdfasdfasdf": 2398239234, "aaaa": "aaaaaaa"}', + '{"bbbb": "bbbbbbbbbbbbbbb", "cccc": "ccccccccccccccccc"}', + '{"this": "is a test", "number": 1234567890}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [ + 1, + -1, + 0, + [ + [0, '{"a'], + [1, 'sdfasdfasdf": 2398239234, "a'], + [0, 'aaa": "aaaaaaa"'], + [-1, ', '], + [1, '}'], + ], + ], + [ + 2, + 0, + 1, + [ + [1, '{'], + [0, '"bbbb": "bbbbbbbbbbbbbbb'], + [1, '", "cccc": "ccccccccccccccccc'], + [0, '"}'], + ], + ], + [0, 1, 2, [[0, '{"this": "is a test", "number": 1234567890}']]], + ]); + }); + + test('various examples', () => { + assertDiff(['0', '1', '3', 'x', 'y', '4', '5'], ['1', '2', '3', '4', 'a', 'b', 'c', '5']); + assertDiff(['a', 'x'], ['b', 'c', 'd']); + assertDiff([], []); + assertDiff(['1'], []); + assertDiff([], ['1']); + assertDiff(['1'], ['1']); + assertDiff(['1', '2'], ['1', '2']); + assertDiff(['1', '2'], ['1', '3', '2']); + assertDiff(['1', '3', '2'], ['1', '2']); + assertDiff(['1', '2', '3', '4', '5', '6', '7'], ['0', '1', '2', '5', 'x', 'y', 'z', 'a', 'b', '7', '8']); + assertDiff([], ['1']); + assertDiff([], []); + assertDiff(['1'], ['1']); + assertDiff(['1', '1'], ['1', '1']); + assertDiff(['1', '1', '2'], ['1', '1', '2']); + assertDiff(['1', '1', '2'], ['1', '1']); + assertDiff(['1', '2', '3'], ['1', '3']); + assertDiff(['1', '2', '3'], ['2', '3']); + assertDiff(['b', 'a'], ['7', '3', 'd', '7', '9', '9', '9']); + assertDiff(['1'], []); + assertDiff(['1', '{}'], []); + assertDiff(['1', '2', '3', '4', '5', '6'], ['3']); + assertDiff(['1', '2', '3'], ['2', '3']); + assertDiff(['1', '2', '3'], ['1', '3']); + assertDiff(['1', '2', '3'], ['1', '2']); + assertDiff(['1', '2', '3', '4'], ['3', '4']); + assertDiff(['1', '2'], ['1']); + assertDiff(['1', '2'], ['2']); + assertDiff(['1', '2', '3', '3', '5', '{a:4}', '5', '"6"'], ['1', '2', '3', '5', '{a:4}', '5', '"6"', '6']); + assertDiff(['0', '1'], ['xyz']); + + assertDiff(['[]'], ['[1]']); + assertDiff(['1', '[]'], ['1', '[1]']); + assertDiff(['1', '2', '3'], ['1', '[2]', '3']); + assertDiff(['1', '[1,2,3,4]', '3'], ['1', '[1,3,455]', '3']); + assertDiff(['1', '[1,2,3,4]', '3'], ['1', '[1,3,455]', '[3]']); + assertDiff(['1', '[1,2,3,4]', '3'], ['1', '[1,2,3,5]', '3']); + assertDiff(['1', '[1,2,3,4]', '3'], ['1', '[1,4,3,5]', '3']); + assertDiff(['[2]'], ['1', '2', '3']); + }); + + test('fuzzer - 1', () => { + const src = [ + '{"KW*V":"Wj6/Y1mgmm6n","uP1`NNND":{")zR8r|^KR":{}},"YYyO7.+>#.6AQ?U":"1%EA(q+S!}*","b\\nyc*o.":487228790.90332836}', + '{"CO:_":238498277.2025599,"Gu4":{"pv`6^#.%9ka1*":true},"(x@cpBcAWb!_\\"{":963865518.3697702,"/Pda+3}:s(/sG{":"fj`({"}', + '{".yk_":201,"KV1C":"yq#Af","b+Cö.EOa":["DDDDDDDDDDDDDDDD"],"%":[]}', + ]; + const dst = [ + '{"Vv.FuN3P}K4*>;":false,".7gC":701259576.4875442,"3r;yV6<;$2i)+Fl":"TS7A1-WLm|U\'Exo","&G/$Ikre-aE`MsL":158207813.24797496,"i|":1927223283245736}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [-1, 0, -1, expect.any(Array)], + [2, 1, 0, expect.any(Array)], + [-1, 2, 0, [[-1, '{".yk_":201,"KV1C":"yq#Af","b+Cö.EOa":["DDDDDDDDDDDDDDDD"],"%":[]}']]], + ]); + }); + + test('fuzzer - 2 (simplified)', () => { + const src = [ + '{asdfasdfasdf}', + '{12341234123412341234}', + '{zzzzzzzzzzzzzzzzzz}', + '{12341234123412341234}', + '{00000000000000000000}', + '{12341234123412341234}', + ]; + const dst = ['{asdfasdfasdf}', '{zzzzzzzzzzzzzzzzzz}', '{00000000000000000000}']; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, expect.any(Array)], + [-1, 1, 0, expect.any(Array)], + [0, 2, 1, expect.any(Array)], + [-1, 3, 1, expect.any(Array)], + [0, 4, 2, expect.any(Array)], + [-1, 5, 2, expect.any(Array)], + ]); + }); + + test('fuzzer - 2', () => { + const src = [ + '{"qED5","Zoypj-Ock^\'":714499113.6419818,"j::O\\"ON.^iud#":{}}', + '{"{\\\\^]wa":[",M/u= |Nu=,2J"],"\\\\D6;;h-,O\\\\-|":181373753.3018791,"[n6[!Z)4":"6H:p-N(uM","sK\\\\8C":[]}', + ]; + const dst = [ + '{"qED5","Zoypj-Ock^\'":714499113.6419818,"j::O\\"ON.^iud#":{}}', + ]; + const patch = line.diff(src, dst); + expect(patch).toEqual([ + [0, 0, 0, expect.any(Array)], + [-1, 1, 0, expect.any(Array)], + [0, 2, 1, expect.any(Array)], + [-1, 3, 1, expect.any(Array)], + [0, 4, 2, expect.any(Array)], + [-1, 5, 2, expect.any(Array)], + ]); + }); + + test('fuzzer - 3', () => { + const src = [ + '{aaaaaaaaaaa}', + '{bbbbbbbbbbb}', + '{"75":259538477846144,"dadqM`0I":322795818.54331195,"<":"f*ßlwäm&=_y@w\\n","53aghXOyD%lC2":373122194.60806453,"\\\\9=M!\\"\\\\Tl-":"r.VdPY`mOQ"}', + '{11111111111111111111}', + ]; + const dst = [ + '{"\\\\ 3[9}0dz+FaW\\"M":"rX?","P.Ed-s-VgiQDuNk":"18","}56zyy3FnC":[" [x[0], x[1], x[2]]); + expect(patch).toEqual([ + [-1, 0, -1], + [2, 1, 0], + [0, 2, 1], + [2, 3, 2], + ]); + }); + + test('fuzzer - 4', () => { + const src = [ + '{"fE#vTih,M!q+TTR":-8702114011119315,"`F\\"M9":true,"]9+FC9f{48NnX":{"+\\\\]IQ7":"a;br-^_m"},"s&":"%n18QdrUewc8Nh8<"}', + '{"<\\"R}d\\"HY65":[53195032.194879085,710289417.4711887],"WH]":"qqqqqqqqqq","W&0fQhOd8":96664625.24402197}', + '{"!2{:XVc3":[814507837.3286607,"A+m+}=p$Y&T"],"?[Tks9wg,pRLz.G":[[]]}', + '{"X^бbAq,":247853730.363063,"+ Mkjq_":-7253373307869407,"`J\\"[^)W KVFk":{"I&a?\\\\\\"1q\\\\":{"66666666666666":">}v1I7y48`JJIG5{"}}}', + ]; + const dst = [ + '{"fE#vTih,M!q+TTR":-8702114011119315,"`F\\"M9":true,"]9+FC9f{48NnX":{"+\\\\]IQ7":"a;br-^_m"},"s&":"%n18QdrUewc8Nh8<"}', + '{"!2{:XVc3":[814507837.3286607,"A+m+}=p$Y&T"],"?[Tks9wg,pRLz.G":[[]]}', + `{"}'-":["o=^\\\\tXk@4",false],"*nF(tbVE=L\\"LiA":-17541,"5a,?p8=]TBLT_x^":916988130.3227228}`, + `{"+.i5D's>W4#EJ%7B":">IYF9h","IeK?Dg{/3>hq7\\\\B[":64967,"KI,cnб!Ty%":2913242861126036,"rv9O@j":false,"dj":"N>"}`, + ]; + const patch = line.diff(src, dst).map((x) => [x[0], x[1], x[2]]); + expect(patch).toEqual([ + [0, 0, 0], + [-1, 1, 0], + [0, 2, 1], + [1, 2, 2], + [2, 3, 3], + ]); + }); + + test('fuzzer - 5', () => { + const src = [ + '{"1111":[true,true],"111111111111111":-34785,"YRb#H`%Q`9yQ;":"S@>/8#"}', + '{"$?":145566270.31451553,"&;\\\\V":729010872.7196132,"B4Xm[[X4":"WLFBc>*popRot]Y",") 8a%d@":811080332.6947087,"LnRab_vKhgz":"%"}', + ]; + const dst = [ + `{"YC9rf7Kg3fI(":"=aEe5Jw7R)m\\\\0Q","b-)-xPNm3":"1%","MHPcv?h\\"'j\\\\z;$?>":[],"LybE:":"|xWDk9r|s%:O0%(","/y@Uz433>:l[%":true}`, + '{"1111":[true,true],"111111111111111":-34785,"YRb#H`%Q`9yQ;":"S@>/8#"}', + ]; + const patch = line.diff(src, dst).map((x) => [x[0], x[1], x[2]]); + // console.log(patch); + expect(patch).toEqual([ + [1, -1, 0], + [2, 0, 1], + [-1, 1, 1], + ]); + }); +}); diff --git a/src/util/diff/__tests__/line.ts b/src/util/diff/__tests__/line.ts new file mode 100644 index 0000000000..89962bbc7a --- /dev/null +++ b/src/util/diff/__tests__/line.ts @@ -0,0 +1,24 @@ +import * as line from '../line'; + +export const assertDiff = (src: string[], dst: string[]) => { + // console.log('src', src); + // console.log('dst', dst); + const diff = line.diff(src, dst); + // console.log(diff); + const res: string[] = []; + if (diff.length) { + for (const [type, srcIdx, dstIdx, patch] of diff) { + if (type === line.LINE_PATCH_OP_TYPE.DEL) { + } else if (type === line.LINE_PATCH_OP_TYPE.INS) { + res.push(dst[dstIdx]); + } else if (type === line.LINE_PATCH_OP_TYPE.EQL) { + res.push(src[srcIdx]); + } else if (type === line.LINE_PATCH_OP_TYPE.MIX) { + res.push(dst[dstIdx]); + } + } + } else { + res.push(...src); + } + expect(res).toEqual(dst); +}; diff --git a/src/util/diff/__tests__/str-fuzz.spec.ts b/src/util/diff/__tests__/str-fuzz.spec.ts new file mode 100644 index 0000000000..2f1ae6ef0d --- /dev/null +++ b/src/util/diff/__tests__/str-fuzz.spec.ts @@ -0,0 +1,35 @@ +import {RandomJson} from '@jsonjoy.com/util/lib/json-random'; +import {assertPatch} from './util'; +import {diff, diffEdit} from '../str'; +const fastDiff = require('fast-diff') as typeof diff; + +const str = () => + Math.random() > 0.7 ? RandomJson.genString(Math.ceil(Math.random() * 200)) : Math.random().toString(36).slice(2); +const iterations = 100; + +test('fuzzing diff()', () => { + for (let i = 0; i < iterations; i++) { + const src = str(); + const dst = str(); + const patch = diff(src, dst); + assertPatch(src, dst, patch); + } +}); + +test('fuzzing diffEdit()', () => { + for (let i = 0; i < iterations; i++) { + const src = str(); + const dst = str(); + const patch = diffEdit(src, dst, Math.floor(Math.random() * src.length)); + assertPatch(src, dst, patch); + } +}); + +test('fuzzing fast-diff', () => { + for (let i = 0; i < iterations; i++) { + const src = str(); + const dst = str(); + const patch = fastDiff(src, dst); + assertPatch(src, dst, patch); + } +}); diff --git a/src/util/diff/__tests__/str.spec.ts b/src/util/diff/__tests__/str.spec.ts new file mode 100644 index 0000000000..3895055d65 --- /dev/null +++ b/src/util/diff/__tests__/str.spec.ts @@ -0,0 +1,223 @@ +import {PATCH_OP_TYPE, type Patch, diff, diffEdit} from '../str'; +import {assertPatch} from './util'; + +describe('diff()', () => { + test('returns a single equality tuple, when strings are identical', () => { + const patch = diffEdit('hello', 'hello', 1); + expect(patch).toEqual([[PATCH_OP_TYPE.EQL, 'hello']]); + assertPatch('hello', 'hello', patch); + }); + + test('single character insert at the beginning', () => { + const patch1 = diff('hello', '_hello'); + const patch2 = diffEdit('hello', '_hello', 1); + const patch3 = diffEdit('hello', '_hello', 4); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.INS, '_'], + [PATCH_OP_TYPE.EQL, 'hello'], + ]); + expect(patch2).toEqual([ + [PATCH_OP_TYPE.INS, '_'], + [PATCH_OP_TYPE.EQL, 'hello'], + ]); + expect(patch3).toEqual([ + [PATCH_OP_TYPE.INS, '_'], + [PATCH_OP_TYPE.EQL, 'hello'], + ]); + assertPatch('hello', '_hello', patch1); + assertPatch('hello', '_hello', patch2); + assertPatch('hello', '_hello', patch3); + }); + + test('single character insert at the end', () => { + const patch1 = diff('hello', 'hello!'); + const patch2 = diffEdit('hello', 'hello!', 6); + const patch3 = diffEdit('hello', 'hello!', 2); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.EQL, 'hello'], + [PATCH_OP_TYPE.INS, '!'], + ]); + expect(patch2).toEqual([ + [PATCH_OP_TYPE.EQL, 'hello'], + [PATCH_OP_TYPE.INS, '!'], + ]); + expect(patch3).toEqual([ + [PATCH_OP_TYPE.EQL, 'hello'], + [PATCH_OP_TYPE.INS, '!'], + ]); + assertPatch('hello', 'hello!', patch1); + assertPatch('hello', 'hello!', patch2); + assertPatch('hello', 'hello!', patch3); + }); + + test('single character removal at the beginning', () => { + const patch = diff('hello', 'ello'); + expect(patch).toEqual([ + [PATCH_OP_TYPE.DEL, 'h'], + [PATCH_OP_TYPE.EQL, 'ello'], + ]); + assertPatch('hello', 'ello', patch); + }); + + test('single character removal at the end', () => { + const patch1 = diff('hello', 'hell'); + const patch2 = diffEdit('hello', 'hell', 4); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.EQL, 'hell'], + [PATCH_OP_TYPE.DEL, 'o'], + ]); + expect(patch2).toEqual([ + [PATCH_OP_TYPE.EQL, 'hell'], + [PATCH_OP_TYPE.DEL, 'o'], + ]); + assertPatch('hello', 'hell', patch1); + assertPatch('hello', 'hell', patch2); + }); + + test('single character replacement at the beginning', () => { + const patch1 = diff('hello', 'Hello'); + const patch2 = diffEdit('hello', 'Hello', 1); + expect(patch1).toEqual([ + [PATCH_OP_TYPE.DEL, 'h'], + [PATCH_OP_TYPE.INS, 'H'], + [PATCH_OP_TYPE.EQL, 'ello'], + ]); + expect(patch2).toEqual([ + [PATCH_OP_TYPE.DEL, 'h'], + [PATCH_OP_TYPE.INS, 'H'], + [PATCH_OP_TYPE.EQL, 'ello'], + ]); + assertPatch('hello', 'Hello', patch1); + assertPatch('hello', 'Hello', patch2); + }); + + test('single character replacement at the end', () => { + const patch = diff('hello', 'hellO'); + expect(patch).toEqual([ + [PATCH_OP_TYPE.EQL, 'hell'], + [PATCH_OP_TYPE.DEL, 'o'], + [PATCH_OP_TYPE.INS, 'O'], + ]); + assertPatch('hello', 'hellO', patch); + }); + + test('two inserts', () => { + const src = '0123456789'; + const dst = '012__3456xx789'; + const patch = diff(src, dst); + assertPatch(src, dst, patch); + }); + + test('two deletes', () => { + const src = '0123456789'; + const dst = '0134589'; + const patch = diff(src, dst); + assertPatch(src, dst, patch); + }); + + test('two inserts and two deletes', () => { + const src = '0123456789'; + const dst = '01_245-678'; + assertPatch(src, dst); + }); + + test('emoji', () => { + assertPatch('a🙃b', 'ab'); + assertPatch('a🙃b', 'a🙃'); + assertPatch('a🙃b', '🙃b'); + assertPatch('a🙃b', 'aasasdfdf👋b'); + assertPatch('a🙃b', 'a👋b'); + }); + + test('same strings', () => { + assertPatch('', ''); + assertPatch('1', '1'); + assertPatch('12', '12'); + assertPatch('asdf asdf asdf', 'asdf asdf asdf'); + assertPatch('a🙃b', 'a🙃b'); + }); + + test('delete everything', () => { + assertPatch('1', ''); + assertPatch('12', ''); + assertPatch('123', ''); + assertPatch('asdf asdf asdf asdf asdf', ''); + assertPatch('a🙃b', ''); + }); + + test('insert into empty string', () => { + assertPatch('', '1'); + assertPatch('', '12'); + assertPatch('', '123'); + assertPatch('', '1234'); + assertPatch('', 'asdf asdf asdf asdf asdf asdf asdf asdf asdf'); + assertPatch('', 'a🙃b'); + }); + + test('common prefix', () => { + assertPatch('abc', 'xyz'); + assertPatch('1234abcdef', '1234xyz'); + assertPatch('1234', '1234xyz'); + assertPatch('1234_', '1234xyz'); + }); + + test('common suffix', () => { + assertPatch('abcdef1234', 'xyz1234'); + assertPatch('1234abcdef', 'xyz1234'); + assertPatch('1234', 'xyz1234'); + assertPatch('_1234', 'xyz1234'); + }); + + test('common overlap', () => { + assertPatch('ab', 'bc'); + assertPatch('abc', 'abcd'); + assertPatch('ab', 'abcd'); + assertPatch('xab', 'abcd'); + assertPatch('xabc', 'abcd'); + assertPatch('xyabc', 'abcd_'); + assertPatch('12345xxx', 'xxabcd'); + }); +}); + +describe('diffEdit()', () => { + const assertDiffEdit = (prefix: string, edit: string, suffix: string) => { + const src1 = prefix + suffix; + const dst1 = prefix + edit + suffix; + const cursor1 = prefix.length + edit.length; + const patch1 = diffEdit(src1, dst1, cursor1); + assertPatch(src1, dst1, patch1); + const patch1Expected: Patch = []; + if (prefix) patch1Expected.push([PATCH_OP_TYPE.EQL, prefix]); + if (edit) patch1Expected.push([PATCH_OP_TYPE.INS, edit]); + if (suffix) patch1Expected.push([PATCH_OP_TYPE.EQL, suffix]); + expect(patch1).toEqual(patch1Expected); + const src2 = prefix + edit + suffix; + const dst2 = prefix + suffix; + const cursor2 = prefix.length; + const patch2 = diffEdit(src2, dst2, cursor2); + assertPatch(src2, dst2, patch2); + const patch2Expected: Patch = []; + if (prefix) patch2Expected.push([PATCH_OP_TYPE.EQL, prefix]); + if (edit) patch2Expected.push([PATCH_OP_TYPE.DEL, edit]); + if (suffix) patch2Expected.push([PATCH_OP_TYPE.EQL, suffix]); + expect(patch2).toEqual(patch2Expected); + }; + + test('can handle various inserts', () => { + assertDiffEdit('', 'a', ''); + assertDiffEdit('a', 'b', ''); + assertDiffEdit('ab', 'c', ''); + assertDiffEdit('abc', 'd', ''); + assertDiffEdit('abcd', 'efg', ''); + assertDiffEdit('abcd', '_', 'efg'); + assertDiffEdit('abcd', '__', 'efg'); + assertDiffEdit('abcd', '___', 'efg'); + assertDiffEdit('', '_', 'var'); + assertDiffEdit('', '_', '_var'); + assertDiffEdit('a', 'b', 'c'); + assertDiffEdit('Hello', ' world', ''); + assertDiffEdit('Hello world', '!', ''); + assertDiffEdit('aaa', 'bbb', 'ccc'); + assertDiffEdit('1', '2', '3'); + }); +}); diff --git a/src/util/diff/__tests__/util.ts b/src/util/diff/__tests__/util.ts new file mode 100644 index 0000000000..aa0019ecd0 --- /dev/null +++ b/src/util/diff/__tests__/util.ts @@ -0,0 +1,37 @@ +import * as diff from '../str'; + +export const assertPatch = (src: string, dst: string, patch: diff.Patch = diff.diff(src, dst)) => { + const src1 = diff.src(patch); + const dst1 = diff.dst(patch); + let dst2 = src; + diff.apply( + patch, + dst2.length, + (pos, str) => { + dst2 = dst2.slice(0, pos) + str + dst2.slice(pos); + }, + (pos, len) => { + dst2 = dst2.slice(0, pos) + dst2.slice(pos + len); + }, + ); + const inverted = diff.invert(patch); + const src2 = diff.dst(inverted); + const dst3 = diff.src(inverted); + let src3 = dst; + diff.apply( + inverted, + src3.length, + (pos, str) => { + src3 = src3.slice(0, pos) + str + src3.slice(pos); + }, + (pos, len) => { + src3 = src3.slice(0, pos) + src3.slice(pos + len); + }, + ); + expect(src1).toBe(src); + expect(src2).toBe(src); + expect(src3).toBe(src); + expect(dst1).toBe(dst); + expect(dst2).toBe(dst); + expect(dst3).toBe(dst); +}; diff --git a/src/util/diff/bin.ts b/src/util/diff/bin.ts new file mode 100644 index 0000000000..723f892044 --- /dev/null +++ b/src/util/diff/bin.ts @@ -0,0 +1,31 @@ +import * as str from './str'; + +export const toStr = (buf: Uint8Array): string => { + let hex = ''; + const length = buf.length; + for (let i = 0; i < length; i++) hex += String.fromCharCode(buf[i]); + return hex; +}; + +export const toBin = (hex: string): Uint8Array => { + const length = hex.length; + const buf = new Uint8Array(length); + for (let i = 0; i < length; i++) buf[i] = hex.charCodeAt(i); + return buf; +}; + +export const diff = (src: Uint8Array, dst: Uint8Array): str.Patch => { + const txtSrc = toStr(src); + const txtDst = toStr(dst); + return str.diff(txtSrc, txtDst); +}; + +export const apply = ( + patch: str.Patch, + srcLen: number, + onInsert: (pos: number, str: Uint8Array) => void, + onDelete: (pos: number, len: number) => void, +) => str.apply(patch, srcLen, (pos, str) => onInsert(pos, toBin(str)), onDelete); + +export const src = (patch: str.Patch): Uint8Array => toBin(str.src(patch)); +export const dst = (patch: str.Patch): Uint8Array => toBin(str.dst(patch)); diff --git a/src/util/diff/line.ts b/src/util/diff/line.ts new file mode 100644 index 0000000000..2b18660f5a --- /dev/null +++ b/src/util/diff/line.ts @@ -0,0 +1,259 @@ +import * as str from './str'; + +export const enum LINE_PATCH_OP_TYPE { + /** + * The whole line is deleted. Delete the current src line and advance the src + * counter. + */ + DEL = -1, + + /** + * Lines are equal in src and dst. Keep the line in src and advance, both, src + * and dst counters. + */ + EQL = 0, + + /** + * The whole line is inserted. Insert the current dst line and advance the dst + * counter. + */ + INS = 1, + + /** + * The line is modified. Execute inner diff between the current src and dst + * lines. Keep the line in src and advance the src and dst counters. + */ + MIX = 2, +} + +export type LinePatchOp = [ + type: LINE_PATCH_OP_TYPE, + /** + * Assignment of this operation to the line in the `src` array. + */ + src: number, + /** + * Assignment of this operation to the line in the `dst` array. + */ + dst: number, + /** + * Character-level patch. + */ + patch: str.Patch, +]; + +export type LinePatch = LinePatchOp[]; + +/** + * Aggregate character-by-character patch into a line-by-line patch. + * + * @param patch Character-level patch + * @returns Line-level patch + */ +export const agg = (patch: str.Patch): str.Patch[] => { + // console.log(patch); + const lines: str.Patch[] = []; + const length = patch.length; + let line: str.Patch = []; + const push = (type: str.PATCH_OP_TYPE, str: string) => { + if (!str.length) return; + const length = line.length; + if (length) { + const lastOp = line[length - 1]; + if (lastOp[0] === type) { + lastOp[1] += str; + return; + } + } + line.push([type, str]); + }; + // console.log("PATCH", patch); + LINES: for (let i = 0; i < length; i++) { + const op = patch[i]; + const type = op[0]; + const str = op[1]; + const index = str.indexOf('\n'); + if (index < 0) { + push(type, str); + continue LINES; + } else { + push(type, str.slice(0, index + 1)); + if (line.length) lines.push(line); + line = []; + } + let prevIndex = index; + const strLen = str.length; + LINE: while (prevIndex < strLen) { + const nextIndex = str.indexOf('\n', prevIndex + 1); + if (nextIndex < 0) { + push(type, str.slice(prevIndex + 1)); + break LINE; + } + lines.push([[type, str.slice(prevIndex + 1, nextIndex + 1)]]); + prevIndex = nextIndex; + } + } + if (line.length) lines.push(line); + // console.log("LINES", lines); + { + const length = lines.length; + for (let i = 0; i < length; i++) { + const line = lines[i]; + let lineLength = line.length; + NORMALIZE_LINE_START: { + if (lineLength < 2) break NORMALIZE_LINE_START; + const firstOp = line[0]; + const secondOp = line[1]; + const secondOpType = secondOp[0]; + if ( + firstOp[0] === str.PATCH_OP_TYPE.EQL && + (secondOpType === str.PATCH_OP_TYPE.DEL || secondOpType === str.PATCH_OP_TYPE.INS) + ) { + for (let j = 2; j < lineLength; j++) if (line[j][0] !== secondOpType) break NORMALIZE_LINE_START; + for (let j = i + 1; j < length; j++) { + const targetLine = lines[j]; + const targetLineLength = targetLine.length; + const pfx = firstOp[1]; + let targetLineFirstOp: str.PatchOperation; + let targetLineSecondOp: str.PatchOperation; + if ( + targetLine.length > 1 && + (targetLineFirstOp = targetLine[0])[0] === secondOpType && + (targetLineSecondOp = targetLine[1])[0] === str.PATCH_OP_TYPE.EQL && + pfx === targetLineFirstOp[1] + ) { + line.splice(0, 1); + secondOp[1] = pfx + secondOp[1]; + targetLineSecondOp[1] = pfx + targetLineSecondOp[1]; + targetLine.splice(0, 1); + } else { + for (let k = 0; k < targetLineLength; k++) + if (targetLine[k][0] !== secondOpType) break NORMALIZE_LINE_START; + } + } + } + } + lineLength = line.length; + NORMALIZE_LINE_END: { + if (lineLength < 2) break NORMALIZE_LINE_END; + const lastOp = line[line.length - 1]; + const lastOpStr = lastOp[1]; + const secondLastOp = line[line.length - 2]; + if (lastOp[0] === str.PATCH_OP_TYPE.DEL) { + // if (lastOp[0] === PATCH_OP_TYPE.DELETE && secondLastOp[0] === PATCH_OP_TYPE.EQUAL) { + for (let j = i + 1; j < length; j++) { + const targetLine = lines[j]; + const targetLineLength = targetLine.length; + if (targetLineLength <= 1) { + if (targetLine[0][0] !== str.PATCH_OP_TYPE.DEL) break NORMALIZE_LINE_END; + } else { + const targetLineLastOp = targetLine[targetLine.length - 1]; + if (targetLineLastOp[0] !== str.PATCH_OP_TYPE.EQL) break NORMALIZE_LINE_END; + for (let k = 0; k < targetLine.length - 1; k++) + if (targetLine[k][0] !== str.PATCH_OP_TYPE.DEL) break NORMALIZE_LINE_END; + let keepStr = targetLineLastOp[1]; + const keepStrEndsWithNl = keepStr.endsWith('\n'); + if (!keepStrEndsWithNl) keepStr += '\n'; + if (keepStr.length > lastOpStr.length) break NORMALIZE_LINE_END; + if (!lastOpStr.endsWith(keepStr)) break NORMALIZE_LINE_END; + const index = lastOpStr.length - keepStr.length; + if (index < 0) { + (lastOp[0] as str.PATCH_OP_TYPE) = str.PATCH_OP_TYPE.EQL; + if (secondLastOp[0] === str.PATCH_OP_TYPE.EQL) { + secondLastOp[1] += lastOpStr; + line.splice(lineLength - 1, 1); + } + } else if (index === 0) { + line.splice(lineLength - 1, 1); + if (secondLastOp[0] === str.PATCH_OP_TYPE.EQL) { + secondLastOp[1] += keepStr; + } else { + line.push([str.PATCH_OP_TYPE.EQL, keepStr]); + } + } else { + lastOp[1] = lastOpStr.slice(0, index); + line.push([str.PATCH_OP_TYPE.EQL, keepStr]); + } + const targetLineSecondLastOp = targetLine[targetLine.length - 2]; + if (targetLineSecondLastOp[0] === str.PATCH_OP_TYPE.DEL) { + targetLineSecondLastOp[1] += keepStrEndsWithNl ? keepStr : keepStr.slice(0, -1); + targetLine.splice(targetLineLength - 1, 1); + } else { + (targetLineLastOp[0] as str.PATCH_OP_TYPE) = str.PATCH_OP_TYPE.DEL; + } + } + } + } + } + } + } + // console.log("NORMALIZED LINES", lines); + return lines; +}; + +export const diff = (src: string[], dst: string[]): LinePatch => { + const srcTxt = src.join('\n') + '\n'; + const dstTxt = dst.join('\n') + '\n'; + if (srcTxt === dstTxt) return []; + const strPatch = str.diff(srcTxt, dstTxt); + const lines = agg(strPatch); + const length = lines.length; + const patch: LinePatch = []; + let srcIdx = -1; + let dstIdx = -1; + const srcLength = src.length; + const dstLength = dst.length; + for (let i = 0; i < length; i++) { + const line = lines[i]; + let lineLength = line.length; + if (!lineLength) continue; + const lastOp = line[lineLength - 1]; + const lastOpType = lastOp[0]; + const txt = lastOp[1]; + if (txt === '\n') line.splice(lineLength - 1, 1); + else { + const strLength = txt.length; + if (txt[strLength - 1] === '\n') { + if (strLength === 1) line.splice(lineLength - 1, 1); + else lastOp[1] = txt.slice(0, strLength - 1); + } + } + let lineType: LINE_PATCH_OP_TYPE = LINE_PATCH_OP_TYPE.EQL; + lineLength = line.length; + if (i + 1 === length) { + if (srcIdx + 1 < srcLength) { + if (dstIdx + 1 < dstLength) { + lineType = + lineLength === 1 && line[0][0] === str.PATCH_OP_TYPE.EQL ? LINE_PATCH_OP_TYPE.EQL : LINE_PATCH_OP_TYPE.MIX; + srcIdx++; + dstIdx++; + } else { + lineType = LINE_PATCH_OP_TYPE.DEL; + srcIdx++; + } + } else { + lineType = LINE_PATCH_OP_TYPE.INS; + dstIdx++; + } + } else { + const op = line[0]; + const type = op[0]; + if (lineLength === 1 && type === lastOpType && type === str.PATCH_OP_TYPE.EQL) { + srcIdx++; + dstIdx++; + } else if (lastOpType === str.PATCH_OP_TYPE.EQL) { + lineType = LINE_PATCH_OP_TYPE.MIX; + srcIdx++; + dstIdx++; + } else if (lastOpType === str.PATCH_OP_TYPE.INS) { + lineType = LINE_PATCH_OP_TYPE.INS; + dstIdx++; + } else if (lastOpType === str.PATCH_OP_TYPE.DEL) { + lineType = LINE_PATCH_OP_TYPE.DEL; + srcIdx++; + } + } + patch.push([lineType, srcIdx, dstIdx, line]); + } + return patch; +}; diff --git a/src/util/diff/str.ts b/src/util/diff/str.ts new file mode 100644 index 0000000000..de0b2499eb --- /dev/null +++ b/src/util/diff/str.ts @@ -0,0 +1,567 @@ +export const enum PATCH_OP_TYPE { + DEL = -1, + EQL = 0, + INS = 1, +} + +export type Patch = PatchOperation[]; +export type PatchOperation = PatchOperationDelete | PatchOperationEqual | PatchOperationInsert; +export type PatchOperationDelete = [type: PATCH_OP_TYPE.DEL, txt: string]; +export type PatchOperationEqual = [type: PATCH_OP_TYPE.EQL, txt: string]; +export type PatchOperationInsert = [type: PATCH_OP_TYPE.INS, txt: string]; + +const startsWithPairEnd = (str: string) => { + const code = str.charCodeAt(0); + return code >= 0xdc00 && code <= 0xdfff; +}; + +const endsWithPairStart = (str: string): boolean => { + const code = str.charCodeAt(str.length - 1); + return code >= 0xd800 && code <= 0xdbff; +}; + +/** + * Reorder and merge like edit sections. Merge equalities. + * Any edit section can move as long as it doesn't cross an equality. + * + * @param diff Array of diff tuples. + * @param fixUnicode Whether to normalize to a unicode-correct diff + */ +const cleanupMerge = (diff: Patch, fixUnicode: boolean) => { + diff.push([PATCH_OP_TYPE.EQL, '']); + let pointer = 0; + let delCnt = 0; + let insCnt = 0; + let delTxt = ''; + let insTxt = ''; + let commonLength: number = 0; + while (pointer < diff.length) { + if (pointer < diff.length - 1 && !diff[pointer][1]) { + diff.splice(pointer, 1); + continue; + } + const d1 = diff[pointer]; + switch (d1[0]) { + case PATCH_OP_TYPE.INS: + insCnt++; + pointer++; + insTxt += d1[1]; + break; + case PATCH_OP_TYPE.DEL: + delCnt++; + pointer++; + delTxt += d1[1]; + break; + case PATCH_OP_TYPE.EQL: { + let prevEq = pointer - insCnt - delCnt - 1; + if (fixUnicode) { + // prevent splitting of unicode surrogate pairs. When `fixUnicode` is true, + // we assume that the old and new text in the diff are complete and correct + // unicode-encoded JS strings, but the tuple boundaries may fall between + // surrogate pairs. We fix this by shaving off stray surrogates from the end + // of the previous equality and the beginning of this equality. This may create + // empty equalities or a common prefix or suffix. For example, if AB and AC are + // emojis, `[[0, 'A'], [-1, 'BA'], [0, 'C']]` would turn into deleting 'ABAC' and + // inserting 'AC', and then the common suffix 'AC' will be eliminated. in this + // particular case, both equalities go away, we absorb any previous inequalities, + // and we keep scanning for the next equality before rewriting the tuples. + const d = diff[prevEq]; + if (prevEq >= 0) { + let str = d[1]; + if (endsWithPairStart(str)) { + const stray = str.slice(-1); + d[1] = str = str.slice(0, -1); + delTxt = stray + delTxt; + insTxt = stray + insTxt; + if (!str) { + // emptied out previous equality, so delete it and include previous delete/insert + diff.splice(prevEq, 1); + pointer--; + let k = prevEq - 1; + const dk = diff[k]; + if (dk) { + const type = dk[0]; + if (type === PATCH_OP_TYPE.INS) { + insCnt++; + k--; + insTxt = dk[1] + insTxt; + } else if (type === PATCH_OP_TYPE.DEL) { + delCnt++; + k--; + delTxt = dk[1] + delTxt; + } + } + prevEq = k; + } + } + } + const d1 = diff[pointer]; + const str1 = d1[1]; + if (startsWithPairEnd(str1)) { + const stray = str1.charAt(0); + d1[1] = str1.slice(1); + delTxt += stray; + insTxt += stray; + } + } + if (pointer < diff.length - 1 && !diff[pointer][1]) { + // for empty equality not at end, wait for next equality + diff.splice(pointer, 1); + break; + } + const hasDelTxt = delTxt.length > 0; + const hasInsTxt = insTxt.length > 0; + if (hasDelTxt || hasInsTxt) { + // note that diff_commonPrefix and diff_commonSuffix are unicode-aware + if (hasDelTxt && hasInsTxt) { + // Factor out any common prefixes. + commonLength = pfx(insTxt, delTxt); + if (commonLength !== 0) { + if (prevEq >= 0) { + diff[prevEq][1] += insTxt.slice(0, commonLength); + } else { + diff.splice(0, 0, [PATCH_OP_TYPE.EQL, insTxt.slice(0, commonLength)]); + pointer++; + } + insTxt = insTxt.slice(commonLength); + delTxt = delTxt.slice(commonLength); + } + // Factor out any common suffixes. + commonLength = sfx(insTxt, delTxt); + if (commonLength !== 0) { + diff[pointer][1] = insTxt.slice(insTxt.length - commonLength) + diff[pointer][1]; + insTxt = insTxt.slice(0, insTxt.length - commonLength); + delTxt = delTxt.slice(0, delTxt.length - commonLength); + } + } + // Delete the offending records and add the merged ones. + const n = insCnt + delCnt; + const delTxtLen = delTxt.length; + const insTxtLen = insTxt.length; + if (delTxtLen === 0 && insTxtLen === 0) { + diff.splice(pointer - n, n); + pointer = pointer - n; + } else if (delTxtLen === 0) { + diff.splice(pointer - n, n, [PATCH_OP_TYPE.INS, insTxt]); + pointer = pointer - n + 1; + } else if (insTxtLen === 0) { + diff.splice(pointer - n, n, [PATCH_OP_TYPE.DEL, delTxt]); + pointer = pointer - n + 1; + } else { + diff.splice(pointer - n, n, [PATCH_OP_TYPE.DEL, delTxt], [PATCH_OP_TYPE.INS, insTxt]); + pointer = pointer - n + 2; + } + } + const d0 = diff[pointer - 1]; + if (pointer !== 0 && d0[0] === PATCH_OP_TYPE.EQL) { + // Merge this equality with the previous one. + d0[1] += diff[pointer][1]; + diff.splice(pointer, 1); + } else pointer++; + insCnt = 0; + delCnt = 0; + delTxt = ''; + insTxt = ''; + break; + } + } + } + if (diff[diff.length - 1][1] === '') diff.pop(); // Remove the dummy entry at the end. + + // Second pass: look for single edits surrounded on both sides by equalities + // which can be shifted sideways to eliminate an equality. + // e.g: ABAC -> ABAC + let changes = false; + pointer = 1; + // Intentionally ignore the first and last element (don't need checking). + while (pointer < diff.length - 1) { + const d0 = diff[pointer - 1]; + const d2 = diff[pointer + 1]; + if (d0[0] === PATCH_OP_TYPE.EQL && d2[0] === PATCH_OP_TYPE.EQL) { + // This is a single edit surrounded by equalities. + const str0 = d0[1]; + const d1 = diff[pointer]; + const str1 = d1[1]; + const str2 = d2[1]; + if (str1.slice(str1.length - str0.length) === str0) { + // Shift the edit over the previous equality. + diff[pointer][1] = str0 + str1.slice(0, str1.length - str0.length); + d2[1] = str0 + str2; + diff.splice(pointer - 1, 1); + changes = true; + } else if (str1.slice(0, str2.length) === str2) { + // Shift the edit over the next equality. + d0[1] += d2[1]; + d1[1] = str1.slice(str2.length) + str2; + diff.splice(pointer + 1, 1); + changes = true; + } + } + pointer++; + } + // If shifts were made, the diff needs reordering and another shift sweep. + if (changes) cleanupMerge(diff, fixUnicode); +}; + +/** + * Given the location of the 'middle snake', split the diff in two parts + * and recurse. + * + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @param x Index of split point in text1. + * @param y Index of split point in text2. + * @return Array of diff tuples. + */ +const bisectSplit = (text1: string, text2: string, x: number, y: number): Patch => { + const diffsA = diff_(text1.slice(0, x), text2.slice(0, y), false); + const diffsB = diff_(text1.slice(x), text2.slice(y), false); + return diffsA.concat(diffsB); +}; + +/** + * Find the 'middle snake' of a diff, split the problem in two + * and return the recursively constructed diff. + * + * This is a port of `diff-patch-match` implementation to TypeScript. + * + * @see http://www.xmailserver.org/diff2.pdf EUGENE W. MYERS 1986 paper: An + * O(ND) Difference Algorithm and Its Variations. + * + * @param text1 Old string to be diffed. + * @param text2 New string to be diffed. + * @return A {@link Patch} - an array of patch operations. + */ +const bisect = (text1: string, text2: string): Patch => { + const text1Length = text1.length; + const text2Length = text2.length; + const maxD = Math.ceil((text1Length + text2Length) / 2); + const vOffset = maxD; + const vLength = 2 * maxD; + const v1 = new Array(vLength); + const v2 = new Array(vLength); + for (let x = 0; x < vLength; x++) { + v1[x] = -1; + v2[x] = -1; + } + v1[vOffset + 1] = 0; + v2[vOffset + 1] = 0; + const delta = text1Length - text2Length; + // If the total number of characters is odd, then the front path will collide + // with the reverse path. + const front = delta % 2 !== 0; + // Offsets for start and end of k loop. + // Prevents mapping of space beyond the grid. + let k1start = 0; + let k1end = 0; + let k2start = 0; + let k2end = 0; + for (let d = 0; d < maxD; d++) { + for (let k1 = -d + k1start; k1 <= d - k1end; k1 += 2) { + const k1_offset = vOffset + k1; + let x1: number = 0; + const v10 = v1[k1_offset - 1]; + const v11 = v1[k1_offset + 1]; + if (k1 === -d || (k1 !== d && v10 < v11)) x1 = v11; + else x1 = v10 + 1; + let y1 = x1 - k1; + while (x1 < text1Length && y1 < text2Length && text1.charAt(x1) === text2.charAt(y1)) { + x1++; + y1++; + } + v1[k1_offset] = x1; + if (x1 > text1Length) k1end += 2; + else if (y1 > text2Length) k1start += 2; + else if (front) { + const k2Offset = vOffset + delta - k1; + const v2Offset = v2[k2Offset]; + if (k2Offset >= 0 && k2Offset < vLength && v2Offset !== -1) { + if (x1 >= text1Length - v2Offset) return bisectSplit(text1, text2, x1, y1); + } + } + } + // Walk the reverse path one step. + for (let k2 = -d + k2start; k2 <= d - k2end; k2 += 2) { + const k2_offset = vOffset + k2; + let x2 = + k2 === -d || (k2 !== d && v2[k2_offset - 1] < v2[k2_offset + 1]) ? v2[k2_offset + 1] : v2[k2_offset - 1] + 1; + let y2 = x2 - k2; + while ( + x2 < text1Length && + y2 < text2Length && + text1.charAt(text1Length - x2 - 1) === text2.charAt(text2Length - y2 - 1) + ) { + x2++; + y2++; + } + v2[k2_offset] = x2; + if (x2 > text1Length) k2end += 2; + else if (y2 > text2Length) k2start += 2; + else if (!front) { + const k1_offset = vOffset + delta - k2; + const x1 = v1[k1_offset]; + if (k1_offset >= 0 && k1_offset < vLength && x1 !== -1) { + const y1 = vOffset + x1 - k1_offset; + x2 = text1Length - x2; + if (x1 >= x2) return bisectSplit(text1, text2, x1, y1); + } + } + } + } + return [ + [PATCH_OP_TYPE.DEL, text1], + [PATCH_OP_TYPE.INS, text2], + ]; +}; + +/** + * Find the differences between two texts. Assumes that the texts do not + * have any common prefix or suffix. + * + * @param src Old string to be diffed. + * @param dst New string to be diffed. + * @return A {@link Patch} - an array of patch operations. + */ +const diffNoCommonAffix = (src: string, dst: string): Patch => { + if (!src) return [[PATCH_OP_TYPE.INS, dst]]; + if (!dst) return [[PATCH_OP_TYPE.DEL, src]]; + const text1Length = src.length; + const text2Length = dst.length; + const long = text1Length > text2Length ? src : dst; + const short = text1Length > text2Length ? dst : src; + const shortTextLength = short.length; + const indexOfContainedShort = long.indexOf(short); + if (indexOfContainedShort >= 0) { + const start = long.slice(0, indexOfContainedShort); + const end = long.slice(indexOfContainedShort + shortTextLength); + return text1Length > text2Length + ? [ + [PATCH_OP_TYPE.DEL, start], + [PATCH_OP_TYPE.EQL, short], + [PATCH_OP_TYPE.DEL, end], + ] + : [ + [PATCH_OP_TYPE.INS, start], + [PATCH_OP_TYPE.EQL, short], + [PATCH_OP_TYPE.INS, end], + ]; + } + if (shortTextLength === 1) + return [ + [PATCH_OP_TYPE.DEL, src], + [PATCH_OP_TYPE.INS, dst], + ]; + return bisect(src, dst); +}; + +/** + * Determine the common prefix of two strings. + * + * @param txt1 First string. + * @param txt2 Second string. + * @return The number of characters common to the start of each string. + */ +export const pfx = (txt1: string, txt2: string) => { + if (!txt1 || !txt2 || txt1.charAt(0) !== txt2.charAt(0)) return 0; + let min = 0; + let max = Math.min(txt1.length, txt2.length); + let mid = max; + let start = 0; + while (min < mid) { + if (txt1.slice(start, mid) === txt2.slice(start, mid)) { + min = mid; + start = min; + } else max = mid; + mid = Math.floor((max - min) / 2 + min); + } + const code = txt1.charCodeAt(mid - 1); + const isSurrogatePairStart = code >= 0xd800 && code <= 0xdbff; + if (isSurrogatePairStart) mid--; + return mid; +}; + +/** + * Determine the common suffix of two strings. + * + * @param txt1 First string. + * @param txt2 Second string. + * @return The number of characters common to the end of each string. + */ +export const sfx = (txt1: string, txt2: string): number => { + if (!txt1 || !txt2 || txt1.slice(-1) !== txt2.slice(-1)) return 0; + let min = 0; + let max = Math.min(txt1.length, txt2.length); + let mid = max; + let end = 0; + while (min < mid) { + if (txt1.slice(txt1.length - mid, txt1.length - end) === txt2.slice(txt2.length - mid, txt2.length - end)) { + min = mid; + end = min; + } else max = mid; + mid = Math.floor((max - min) / 2 + min); + } + const code = txt1.charCodeAt(txt1.length - mid); + const isSurrogatePairEnd = code >= 0xd800 && code <= 0xdbff; + if (isSurrogatePairEnd) mid--; + return mid; +}; + +/** + * Find the differences between two texts. Simplifies the problem by stripping + * any common prefix or suffix off the texts before diffing. + * + * @param src Old string to be diffed. + * @param dst New string to be diffed. + * @param cleanup Whether to apply semantic cleanup before returning. + * @return A {@link Patch} - an array of patch operations. + */ +const diff_ = (src: string, dst: string, fixUnicode: boolean): Patch => { + if (src === dst) return src ? [[PATCH_OP_TYPE.EQL, src]] : []; + + // Trim off common prefix (speedup). + const prefixLength = pfx(src, dst); + const prefix = src.slice(0, prefixLength); + src = src.slice(prefixLength); + dst = dst.slice(prefixLength); + + // Trim off common suffix (speedup). + const suffixLength = sfx(src, dst); + const suffix = src.slice(src.length - suffixLength); + src = src.slice(0, src.length - suffixLength); + dst = dst.slice(0, dst.length - suffixLength); + + // Compute the diff on the middle block. + const diff: Patch = diffNoCommonAffix(src, dst); + if (prefix) diff.unshift([PATCH_OP_TYPE.EQL, prefix]); + if (suffix) diff.push([PATCH_OP_TYPE.EQL, suffix]); + cleanupMerge(diff, fixUnicode); + return diff; +}; + +/** + * Find the differences between two texts. + * + * @param src Old string to be diffed. + * @param dst New string to be diffed. + * @return A {@link Patch} - an array of patch operations. + */ +export const diff = (src: string, dst: string): Patch => diff_(src, dst, true); + +/** + * Considers simple insertion and deletion cases around the caret position in + * the destination string. If the fast patch cannot be constructed, it falls + * back to the default full implementation. + * + * Cases considered: + * + * 1. Insertion of a single or multiple characters right before the caret. + * 2. Deletion of one or more characters right before the caret. + * + * @param src Old string to be diffed. + * @param dst New string to be diffed. + * @param caret The position of the caret in the new string. Set to -1 to + * ignore the caret position. + * @return A {@link Patch} - an array of patch operations. + */ +export const diffEdit = (src: string, dst: string, caret: number) => { + edit: { + if (caret < 0) break edit; + const srcLen = src.length; + const dstLen = dst.length; + if (srcLen === dstLen) break edit; + const dstSfx = dst.slice(caret); + const sfxLen = dstSfx.length; + if (sfxLen > srcLen) break edit; + const srcSfx = src.slice(srcLen - sfxLen); + if (srcSfx !== dstSfx) break edit; + const isInsert = dstLen > srcLen; + if (isInsert) { + const pfxLen = srcLen - sfxLen; + const srcPfx = src.slice(0, pfxLen); + const dstPfx = dst.slice(0, pfxLen); + if (srcPfx !== dstPfx) break edit; + const insert = dst.slice(pfxLen, caret); + const patch: Patch = []; + if (srcPfx) patch.push([PATCH_OP_TYPE.EQL, srcPfx]); + if (insert) patch.push([PATCH_OP_TYPE.INS, insert]); + if (dstSfx) patch.push([PATCH_OP_TYPE.EQL, dstSfx]); + return patch; + } else { + const pfxLen = dstLen - sfxLen; + const dstPfx = dst.slice(0, pfxLen); + const srcPfx = src.slice(0, pfxLen); + if (srcPfx !== dstPfx) break edit; + const del = src.slice(pfxLen, srcLen - sfxLen); + const patch: Patch = []; + if (srcPfx) patch.push([PATCH_OP_TYPE.EQL, srcPfx]); + if (del) patch.push([PATCH_OP_TYPE.DEL, del]); + if (dstSfx) patch.push([PATCH_OP_TYPE.EQL, dstSfx]); + return patch; + } + } + return diff(src, dst); +}; + +export const src = (patch: Patch): string => { + let txt = ''; + const length = patch.length; + for (let i = 0; i < length; i++) { + const op = patch[i]; + if (op[0] !== PATCH_OP_TYPE.INS) txt += op[1]; + } + return txt; +}; + +export const dst = (patch: Patch): string => { + let txt = ''; + const length = patch.length; + for (let i = 0; i < length; i++) { + const op = patch[i]; + if (op[0] !== PATCH_OP_TYPE.DEL) txt += op[1]; + } + return txt; +}; + +const invertOp = (op: PatchOperation): PatchOperation => { + const type = op[0]; + return type === PATCH_OP_TYPE.EQL + ? op + : type === PATCH_OP_TYPE.INS + ? [PATCH_OP_TYPE.DEL, op[1]] + : [PATCH_OP_TYPE.INS, op[1]]; +}; + +/** + * Inverts patch such that it can be applied to `dst` to get `src` (instead of + * `src` to get `dst`). + * + * @param patch The patch to invert. + * @returns Inverted patch. + */ +export const invert = (patch: Patch): Patch => patch.map(invertOp); + +/** + * @param patch The patch to apply. + * @param srcLen The length of the source string. + * @param onInsert Callback for insert operations. + * @param onDelete Callback for delete operations. + */ +export const apply = ( + patch: Patch, + srcLen: number, + onInsert: (pos: number, str: string) => void, + onDelete: (pos: number, len: number, str: string) => void, +) => { + const length = patch.length; + let pos = srcLen; + for (let i = length - 1; i >= 0; i--) { + const [type, str] = patch[i]; + if (type === PATCH_OP_TYPE.EQL) pos -= str.length; + else if (type === PATCH_OP_TYPE.INS) onInsert(pos, str); + else { + const len = str.length; + pos -= len; + onDelete(pos, len, str); + } + } +}; diff --git a/src/util/strCnt.ts b/src/util/strCnt.ts new file mode 100644 index 0000000000..2c3db6965d --- /dev/null +++ b/src/util/strCnt.ts @@ -0,0 +1,11 @@ +export const strCnt = (needle: string, haystack: string, offset: number = 0): number => { + let cnt = 0; + const needleLen = needle.length; + if (needleLen === 0) return 0; + while (true) { + const index = haystack.indexOf(needle, offset); + if (index < 0) return cnt; + cnt++; + offset = index + needleLen; + } +}; diff --git a/yarn.lock b/yarn.lock index 12ae85d662..d19a8cf8ed 100644 --- a/yarn.lock +++ b/yarn.lock @@ -678,6 +678,11 @@ resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.5.0.tgz#6008e35b9d9d8ee27bc4bfaa70c8cbf33a537b4c" integrity sha512-ojoNsrIuPI9g6o8UxhraZQSyF2ByJanAY4cTFbc8Mf2AXEF4aQRGY1dJxyJpuyav8r9FGflEt/Ff3u5Nt6YMPA== +"@jsonjoy.com/util@^1.6.0": + version "1.6.0" + resolved "https://registry.yarnpkg.com/@jsonjoy.com/util/-/util-1.6.0.tgz#23991b2fe12cb3a006573d9dc97c768d3ed2c9f1" + integrity sha512-sw/RMbehRhN68WRtcKCpQOPfnH6lLP4GJfqzi3iYej8tnzpZUDr6UkZYJjcjjC0FWEJOJbyM3PTIwxucUmDG2A== + "@leichtgewicht/ip-codec@^2.0.1": version "2.0.5" resolved "https://registry.yarnpkg.com/@leichtgewicht/ip-codec/-/ip-codec-2.0.5.tgz#4fc56c15c580b9adb7dc3c333a134e540b44bfb1" @@ -2165,7 +2170,6 @@ dunder-proto@^1.0.1: "editing-traces@https://github.com/streamich/editing-traces#6494020428530a6e382378b98d1d7e31334e2d7b": version "0.0.0" - uid "6494020428530a6e382378b98d1d7e31334e2d7b" resolved "https://github.com/streamich/editing-traces#6494020428530a6e382378b98d1d7e31334e2d7b" ee-first@1.1.1: @@ -3514,7 +3518,6 @@ jsesc@^3.0.2: "json-crdt-traces@https://github.com/streamich/json-crdt-traces#ec825401dc05cbb74b9e0b3c4d6527399f54d54d": version "0.0.1" - uid ec825401dc05cbb74b9e0b3c4d6527399f54d54d resolved "https://github.com/streamich/json-crdt-traces#ec825401dc05cbb74b9e0b3c4d6527399f54d54d" json-logic-js@^2.0.2: