Skip to content

Commit fc622d5

Browse files
authored
Merge pull request #133 from mbarbin/vendor-merge3-from-gazagnaire
Vendor gazagnaire/ocaml-merge3 for the Myers diff computation
2 parents bf67aa4 + f85da04 commit fc622d5

19 files changed

Lines changed: 334 additions & 255 deletions

File tree

.vscode/settings.json

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
"**/crs-config.json": "jsonc"
66
},
77
"cSpell.words": [
8+
"Algorithmica",
89
"alist",
910
"autofmt",
1011
"Barbin",
@@ -42,6 +43,7 @@
4243
"Fpath",
4344
"fprintf",
4445
"Fsegment",
46+
"gazagnaire",
4547
"getcwd",
4648
"gpatch",
4749
"groupi",
@@ -57,6 +59,7 @@
5759
"lsplit",
5860
"lstrip",
5961
"mapi",
62+
"monopampam",
6063
"Mathieu",
6164
"mbarbin",
6265
"MELPA",
@@ -99,6 +102,7 @@
99102
"Streeters",
100103
"stringable",
101104
"stringp",
105+
"tangled",
102106
"textf",
103107
"textutils",
104108
"Uchar",

NOTICE.md

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,23 @@ This project has a test dependency to the opam packages [printbox](https://opam.
7575

7676
A copy of the license file for the printbox project is located under `third-party-licenses/`.
7777

78-
## Windtrap.Myers
78+
## Gazagnaire ocaml-merge3 (Myers diff)
7979

80-
`Myers` is vendored from the [windtrap](https://github.com/invariant-hq/windtrap) test framework by Thibaut Mattio (released under `ISC`) plus minor modifications documented in the file.
80+
Myers is vendored from by Thomas Gazagnaire (released under `ISC`)
81+
[ocaml-merge3](https://tangled.org/gazagnaire.org/ocaml-merge3). Only the pure
82+
diff computation is vendored; the parts unused by this project are not included.
83+
The exact provenance and list of changes are documented at the top of
84+
`src/myers/merge3.ml` and in `src/myers/vendor.json`.
85+
86+
A copy of the license file for ocaml-merge3 is located under
87+
`third-party-license/gazagnaire/ocaml-merge3/LICENSE`.
88+
89+
## Windtrap (unified-diff renderer)
90+
91+
The unified-diff renderer in `src/myers/myers.ml` is vendored from
92+
[windtrap](https://github.com/invariant-hq/windtrap) by Invariant Systems
93+
(released under `ISC`). The exact provenance and list of changes are documented
94+
at the top of `src/myers/myers.ml` and in `src/myers/vendor.json`.
95+
96+
A copy of the license file for windtrap is located under
97+
`third-party-license/invariant-hq/windtrap/LICENSE`.

crs-tests.opam

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ depends: [
3737
"volgo-git-unix" {>= "0.0.21"}
3838
"volgo-hg-unix" {>= "0.0.21"}
3939
"volgo-vcs" {>= "0.0.21"}
40-
"windtrap" {with-test & >= "0.1.0"}
4140
"yojson" {>= "2.2.2"}
4241
"odoc" {with-doc}
4342
]

dune-project

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -131,15 +131,6 @@
131131
(>= 0.0.21))
132132
(volgo-vcs
133133
(>= 0.0.21))
134-
(windtrap
135-
;; For the scope of the project the [:with-test] annotation below does not
136-
;; match our conventions, as this is test-only package. But without it,
137-
;; [opam-dune-lint] complains (it is only used by a dune [test] stanza atm.)
138-
;; We wish to continue relying on a clean exit code for [opam-dune-lint] at
139-
;; this time, so we added the annotation.
140-
(and
141-
:with-test
142-
(>= 0.1.0)))
143134
(yojson
144135
(>= 2.2.2))))
145136

src/myers/COPYING.HEADER

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,3 @@
1-
crs-myers - Vendoring windtrap.myers with minor changes
1+
crs-myers - Myers diff computation and unified-diff printing
22
Copyright (C) 2026 Mathieu Barbin <mathieu.barbin@gmail.com>
33
SPDX-License-Identifier: ISC
4-
5-
The code was vendored from [https://github.com/invariant-hq/windtrap].
6-
7-
Copyright (c) 2026 Invariant Systems. All rights reserved.
8-
SPDX-License-Identifier: ISC

src/myers/dune

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
(library
22
(name crs_myers)
33
(package crs-tests)
4+
(private_modules merge3)
45
(flags :standard -w +a-4-40-41-42-44-45-48-66 -warn-error +a)
56
(instrumentation
67
(backend bisect_ppx))

src/myers/merge3.ml

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
(**************************************************************************)
2+
(* crs-myers - Myers diff computation and unified-diff printing *)
3+
(* Copyright (C) 2026 Mathieu Barbin <mathieu.barbin@gmail.com> *)
4+
(* SPDX-License-Identifier: ISC *)
5+
(**************************************************************************)
6+
7+
(* Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org> *)
8+
(* SPDX-License-Identifier: ISC *)
9+
10+
(* Notice: This file was vendored from gazagnaire/ocaml-merge3 (the [Merge3]
11+
module, [lib/merge3.ml]) as documented in [vendor.json] and the project's
12+
root [NOTICE.md].
13+
14+
List of changes:
15+
16+
- Applied local project ocamlformat (janestreet profile).
17+
- Removed the parts unused by this project.
18+
19+
- Replace use of globally-visible [Stdlib.Exit] exception by a custom one.
20+
An [eq] that itself raised [Exit] would be silently swallowed and yield
21+
a wrong diff. *)
22+
23+
(** {1 Myers' O(ND) Diff Algorithm}
24+
25+
E. W. Myers, "An O(ND) Difference Algorithm and Its Variations",
26+
Algorithmica 1(2), 1986, pp. 251–266.
27+
28+
The algorithm finds the shortest edit script (SES) between two sequences. It
29+
works by computing the furthest-reaching D-paths for increasing edit
30+
distances D = 0, 1, 2, ... The key insight is that diagonal k = x - y
31+
represents a state where x characters from [a] and y from [b] have been
32+
consumed, and only even/odd diagonals are reachable at each step.
33+
34+
Time: O(ND) where N = |a| + |b| and D = edit distance. Space: O(D²) for the
35+
trace (one V-array per step). *)
36+
37+
type 'a edit =
38+
| Keep of 'a
39+
| Delete of 'a
40+
| Insert of 'a
41+
42+
(** Compute the furthest-reaching D-paths.
43+
44+
Records snapshots of the active V range [-d..d] (size 2d+1) at each step
45+
instead of the full V array (size 2*max_d+1). This is the standard Myers
46+
space optimisation: at step d only diagonals -d..d are reachable, so the
47+
rest of V is unused. The trace becomes O(D²) instead of O(D*N), which is a
48+
substantial win when D ≪ N (typical for incremental edits).
49+
50+
Returns [(D, trace)] where [trace.(d)] is an array of length [2*d+1] indexed
51+
by [k+d] (so trace.(d).(0) holds V[-d], trace.(d).(2*d) holds V[d]). *)
52+
53+
exception Myers_done
54+
55+
let myers_forward ~eq ~off a b ~max_d =
56+
let n = Array.length a
57+
and m = Array.length b in
58+
let vlen = (2 * max_d) + 1 in
59+
let v = Array.make vlen 0 in
60+
v.(off + 1) <- 0;
61+
let trace = Array.make (max_d + 1) [||] in
62+
let final_d = ref 0 in
63+
(try
64+
for d = 0 to max_d do
65+
(* Snapshot only the active range used at step d (diagonals -d..d). *)
66+
trace.(d) <- Array.sub v (off - d) ((2 * d) + 1);
67+
for k0 = 0 to d do
68+
let k = -d + (2 * k0) in
69+
let x0 =
70+
if k = -d || (k <> d && v.(off + k - 1) < v.(off + k + 1))
71+
then v.(off + k + 1)
72+
else v.(off + k - 1) + 1
73+
in
74+
let x = ref x0
75+
and y = ref (x0 - k) in
76+
while !x < n && !y < m && eq a.(!x) b.(!y) do
77+
incr x;
78+
incr y
79+
done;
80+
v.(off + k) <- !x;
81+
if !x >= n && !y >= m
82+
then (
83+
final_d := d;
84+
raise_notrace Myers_done)
85+
done
86+
done
87+
with
88+
| Myers_done -> ());
89+
!final_d, trace
90+
;;
91+
92+
(** Backtrack one step in the Myers trace, emitting the snake's [Keep]
93+
operations and the single non-diagonal edit. Returns the previous [(x, y)]
94+
position.
95+
96+
[vv] is the snapshot at step [dd]: an array of length [2*dd+1] where
97+
[vv.(k+dd)] holds the V value for diagonal [k]. *)
98+
let backtrack_step ~vv ~dd ~x ~y a b edits =
99+
let k = x - y in
100+
(* The previous snapshot only has diagonals -(dd-1)..(dd-1), but we read
101+
V[k-1] and V[k+1] from the current step's snapshot — those are guaranteed
102+
to be in range because k ∈ [-dd, dd] and k±1 ∈ [-(dd+1), dd+1], but
103+
critically when we make the choice we look at V[k-1] and V[k+1] from
104+
the SAME snapshot (saved at the start of step dd, which is the V state
105+
after step dd-1), so they're both in [-(dd-1), dd-1] ⊆ [-dd, dd]. *)
106+
let v_at i = vv.(i + dd) in
107+
let is_insert = k = -dd || (k <> dd && v_at (k - 1) < v_at (k + 1)) in
108+
let snake_x = if is_insert then v_at (k + 1) else v_at (k - 1) + 1 in
109+
for i = x - 1 downto snake_x do
110+
edits := Keep a.(i) :: !edits
111+
done;
112+
if is_insert
113+
then edits := Insert b.(snake_x - k - 1) :: !edits
114+
else edits := Delete a.(snake_x - 1) :: !edits;
115+
let prev_k = if is_insert then k + 1 else k - 1 in
116+
let prev_x = v_at prev_k in
117+
prev_x, prev_x - prev_k
118+
;;
119+
120+
let diff ~eq (a : 'a array) (b : 'a array) : 'a edit list =
121+
let n = Array.length a
122+
and m = Array.length b in
123+
if n = 0 && m = 0
124+
then []
125+
else if n = 0
126+
then Array.to_list b |> List.map (fun x -> Insert x)
127+
else if m = 0
128+
then Array.to_list a |> List.map (fun x -> Delete x)
129+
else (
130+
let max_d = n + m in
131+
let off = max_d in
132+
let d, trace = myers_forward ~eq ~off a b ~max_d in
133+
let edits = ref [] in
134+
let x = ref n
135+
and y = ref m in
136+
for step = 0 to d - 1 do
137+
let dd = d - step in
138+
let nx, ny = backtrack_step ~vv:trace.(dd) ~dd ~x:!x ~y:!y a b edits in
139+
x := nx;
140+
y := ny
141+
done;
142+
for i = !x - 1 downto 0 do
143+
edits := Keep a.(i) :: !edits
144+
done;
145+
!edits)
146+
;;

src/myers/merge3.mli

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,31 @@
1+
(*_*************************************************************************)
2+
(*_ crs-myers - Myers diff computation and unified-diff printing *)
3+
(*_ Copyright (C) 2026 Mathieu Barbin <mathieu.barbin@gmail.com> *)
4+
(*_ SPDX-License-Identifier: ISC *)
5+
(*_*************************************************************************)
6+
7+
(*_ Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org> *)
8+
(*_ SPDX-License-Identifier: ISC *)
9+
10+
(** Myers' O(ND) shortest-edit-script, vendored from gazagnaire/ocaml-merge3.
11+
12+
Only the pure diff computation is vendored; see [merge3.ml] and the root
13+
[NOTICE.md] for the list of (non-algorithmic) parts removed. *)
14+
15+
(** An edit operation in the shortest edit script. *)
16+
type 'a edit =
17+
| Keep of 'a (** Line present in both sequences. *)
18+
| Delete of 'a (** Line present in old, absent in new. *)
19+
| Insert of 'a (** Line absent in old, present in new. *)
20+
21+
(** [diff ~eq a b] computes the shortest edit script from [a] to [b] using
22+
Myers' O(ND) algorithm. [eq] is the equality predicate.
23+
24+
The result is a list of edits that transforms [a] into [b]:
25+
- [Keep x]: line [x] is present in both
26+
- [Delete x]: line [x] from [a] is removed
27+
- [Insert x]: line [x] from [b] is added
28+
29+
Time: O(ND) where N = |a| + |b| and D = edit distance. Space: O(D²) for the
30+
trace. *)
31+
val diff : eq:('a -> 'a -> bool) -> 'a array -> 'a array -> 'a edit list

0 commit comments

Comments
 (0)