|
| 1 | +(**************************************************************************) |
| 2 | +(* crs-myers - Myers diff computation and unified-diff printing *) |
| 3 | +(* Copyright (C) 2026 Mathieu Barbin <mathieu.barbin@gmail.com> *) |
| 4 | +(* SPDX-License-Identifier: ISC *) |
| 5 | +(**************************************************************************) |
| 6 | + |
| 7 | +(* Copyright (c) 2024-2026 Thomas Gazagnaire <thomas@gazagnaire.org> *) |
| 8 | +(* SPDX-License-Identifier: ISC *) |
| 9 | + |
| 10 | +(* Notice: This file was vendored from gazagnaire/ocaml-merge3 (the [Merge3] |
| 11 | + module, [lib/merge3.ml]) as documented in [vendor.json] and the project's |
| 12 | + root [NOTICE.md]. |
| 13 | +
|
| 14 | + List of changes: |
| 15 | +
|
| 16 | + - Applied local project ocamlformat (janestreet profile). |
| 17 | + - Removed the parts unused by this project. |
| 18 | +
|
| 19 | + - Replace use of globally-visible [Stdlib.Exit] exception by a custom one. |
| 20 | + An [eq] that itself raised [Exit] would be silently swallowed and yield |
| 21 | + a wrong diff. *) |
| 22 | + |
| 23 | +(** {1 Myers' O(ND) Diff Algorithm} |
| 24 | +
|
| 25 | + E. W. Myers, "An O(ND) Difference Algorithm and Its Variations", |
| 26 | + Algorithmica 1(2), 1986, pp. 251–266. |
| 27 | +
|
| 28 | + The algorithm finds the shortest edit script (SES) between two sequences. It |
| 29 | + works by computing the furthest-reaching D-paths for increasing edit |
| 30 | + distances D = 0, 1, 2, ... The key insight is that diagonal k = x - y |
| 31 | + represents a state where x characters from [a] and y from [b] have been |
| 32 | + consumed, and only even/odd diagonals are reachable at each step. |
| 33 | +
|
| 34 | + Time: O(ND) where N = |a| + |b| and D = edit distance. Space: O(D²) for the |
| 35 | + trace (one V-array per step). *) |
| 36 | + |
| 37 | +type 'a edit = |
| 38 | + | Keep of 'a |
| 39 | + | Delete of 'a |
| 40 | + | Insert of 'a |
| 41 | + |
| 42 | +(** Compute the furthest-reaching D-paths. |
| 43 | +
|
| 44 | + Records snapshots of the active V range [-d..d] (size 2d+1) at each step |
| 45 | + instead of the full V array (size 2*max_d+1). This is the standard Myers |
| 46 | + space optimisation: at step d only diagonals -d..d are reachable, so the |
| 47 | + rest of V is unused. The trace becomes O(D²) instead of O(D*N), which is a |
| 48 | + substantial win when D ≪ N (typical for incremental edits). |
| 49 | +
|
| 50 | + Returns [(D, trace)] where [trace.(d)] is an array of length [2*d+1] indexed |
| 51 | + by [k+d] (so trace.(d).(0) holds V[-d], trace.(d).(2*d) holds V[d]). *) |
| 52 | + |
| 53 | +exception Myers_done |
| 54 | + |
| 55 | +let myers_forward ~eq ~off a b ~max_d = |
| 56 | + let n = Array.length a |
| 57 | + and m = Array.length b in |
| 58 | + let vlen = (2 * max_d) + 1 in |
| 59 | + let v = Array.make vlen 0 in |
| 60 | + v.(off + 1) <- 0; |
| 61 | + let trace = Array.make (max_d + 1) [||] in |
| 62 | + let final_d = ref 0 in |
| 63 | + (try |
| 64 | + for d = 0 to max_d do |
| 65 | + (* Snapshot only the active range used at step d (diagonals -d..d). *) |
| 66 | + trace.(d) <- Array.sub v (off - d) ((2 * d) + 1); |
| 67 | + for k0 = 0 to d do |
| 68 | + let k = -d + (2 * k0) in |
| 69 | + let x0 = |
| 70 | + if k = -d || (k <> d && v.(off + k - 1) < v.(off + k + 1)) |
| 71 | + then v.(off + k + 1) |
| 72 | + else v.(off + k - 1) + 1 |
| 73 | + in |
| 74 | + let x = ref x0 |
| 75 | + and y = ref (x0 - k) in |
| 76 | + while !x < n && !y < m && eq a.(!x) b.(!y) do |
| 77 | + incr x; |
| 78 | + incr y |
| 79 | + done; |
| 80 | + v.(off + k) <- !x; |
| 81 | + if !x >= n && !y >= m |
| 82 | + then ( |
| 83 | + final_d := d; |
| 84 | + raise_notrace Myers_done) |
| 85 | + done |
| 86 | + done |
| 87 | + with |
| 88 | + | Myers_done -> ()); |
| 89 | + !final_d, trace |
| 90 | +;; |
| 91 | + |
| 92 | +(** Backtrack one step in the Myers trace, emitting the snake's [Keep] |
| 93 | + operations and the single non-diagonal edit. Returns the previous [(x, y)] |
| 94 | + position. |
| 95 | +
|
| 96 | + [vv] is the snapshot at step [dd]: an array of length [2*dd+1] where |
| 97 | + [vv.(k+dd)] holds the V value for diagonal [k]. *) |
| 98 | +let backtrack_step ~vv ~dd ~x ~y a b edits = |
| 99 | + let k = x - y in |
| 100 | + (* The previous snapshot only has diagonals -(dd-1)..(dd-1), but we read |
| 101 | + V[k-1] and V[k+1] from the current step's snapshot — those are guaranteed |
| 102 | + to be in range because k ∈ [-dd, dd] and k±1 ∈ [-(dd+1), dd+1], but |
| 103 | + critically when we make the choice we look at V[k-1] and V[k+1] from |
| 104 | + the SAME snapshot (saved at the start of step dd, which is the V state |
| 105 | + after step dd-1), so they're both in [-(dd-1), dd-1] ⊆ [-dd, dd]. *) |
| 106 | + let v_at i = vv.(i + dd) in |
| 107 | + let is_insert = k = -dd || (k <> dd && v_at (k - 1) < v_at (k + 1)) in |
| 108 | + let snake_x = if is_insert then v_at (k + 1) else v_at (k - 1) + 1 in |
| 109 | + for i = x - 1 downto snake_x do |
| 110 | + edits := Keep a.(i) :: !edits |
| 111 | + done; |
| 112 | + if is_insert |
| 113 | + then edits := Insert b.(snake_x - k - 1) :: !edits |
| 114 | + else edits := Delete a.(snake_x - 1) :: !edits; |
| 115 | + let prev_k = if is_insert then k + 1 else k - 1 in |
| 116 | + let prev_x = v_at prev_k in |
| 117 | + prev_x, prev_x - prev_k |
| 118 | +;; |
| 119 | + |
| 120 | +let diff ~eq (a : 'a array) (b : 'a array) : 'a edit list = |
| 121 | + let n = Array.length a |
| 122 | + and m = Array.length b in |
| 123 | + if n = 0 && m = 0 |
| 124 | + then [] |
| 125 | + else if n = 0 |
| 126 | + then Array.to_list b |> List.map (fun x -> Insert x) |
| 127 | + else if m = 0 |
| 128 | + then Array.to_list a |> List.map (fun x -> Delete x) |
| 129 | + else ( |
| 130 | + let max_d = n + m in |
| 131 | + let off = max_d in |
| 132 | + let d, trace = myers_forward ~eq ~off a b ~max_d in |
| 133 | + let edits = ref [] in |
| 134 | + let x = ref n |
| 135 | + and y = ref m in |
| 136 | + for step = 0 to d - 1 do |
| 137 | + let dd = d - step in |
| 138 | + let nx, ny = backtrack_step ~vv:trace.(dd) ~dd ~x:!x ~y:!y a b edits in |
| 139 | + x := nx; |
| 140 | + y := ny |
| 141 | + done; |
| 142 | + for i = !x - 1 downto 0 do |
| 143 | + edits := Keep a.(i) :: !edits |
| 144 | + done; |
| 145 | + !edits) |
| 146 | +;; |
0 commit comments