Skip to content

Commit

Permalink
Add simple Hunt–McIlroy LCS algorithm (#10)
Browse files Browse the repository at this point in the history
  • Loading branch information
mitsuhiko committed Feb 14, 2021
1 parent df78bdc commit bee5d88
Show file tree
Hide file tree
Showing 7 changed files with 267 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ All notable changes to similar are documented here.
## 1.2.0

* Make the unicode feature optional for inline diffing.
* Added Hunt–McIlroy LCS algorithm.

## 1.1.0

Expand Down
181 changes: 181 additions & 0 deletions src/algorithms/lcs.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,181 @@
//! Hunt–McIlroy / Hunt–Szymanski LCS diff algorithm.
//!
//! * time: `O((NM)D log (M)D)`
//! * space `O(MN)`
use std::ops::{Index, Range};

use crate::algorithms::DiffHook;

/// Myers' diff algorithm.
///
/// Diff `old`, between indices `old_range` and `new` between indices `new_range`.
pub fn diff<Old, New, D>(
d: &mut D,
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Result<(), D::Error>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
D: DiffHook,
New::Output: PartialEq<Old::Output>,
{
if new_range.is_empty() {
d.delete(old_range.start, old_range.len(), new_range.start)?;
return Ok(());
} else if old_range.is_empty() {
d.insert(old_range.start, new_range.start, new_range.len())?;
return Ok(());
}

let prefix_len = old_range
.clone()
.zip(new_range.clone())
.take_while(|x| new[x.1] == old[x.0])
.count();
let suffix_len = old_range
.clone()
.rev()
.zip(new_range.clone().rev())
.take(old_range.len().min(new_range.len()) - prefix_len)
.take_while(|x| new[x.1] == old[x.0])
.count();

let table = make_table(
old,
prefix_len..(old_range.len() - suffix_len),
new,
prefix_len..(new_range.len() - suffix_len),
);
let mut old_idx = 0;
let mut new_idx = 0;
let new_len = new_range.len() - prefix_len - suffix_len;
let old_len = old_range.len() - prefix_len - suffix_len;

if prefix_len > 0 {
d.equal(old_range.start, new_range.start, prefix_len)?;
}

while new_idx < new_len && old_idx < old_len {
let old_orig_idx = old_range.start + prefix_len + old_idx;
let new_orig_idx = new_range.start + prefix_len + new_idx;

if new[new_orig_idx] == old[old_orig_idx] {
d.equal(old_orig_idx, new_orig_idx, 1)?;
old_idx += 1;
new_idx += 1;
} else if table[new_idx][old_idx + 1] >= table[new_idx + 1][old_idx] {
d.delete(old_orig_idx, 1, new_orig_idx)?;
old_idx += 1;
} else {
d.insert(old_orig_idx, new_orig_idx, 1)?;
new_idx += 1;
}
}

if old_idx < old_len {
d.delete(
old_range.start + prefix_len + old_idx,
old_len - old_idx,
new_range.start + prefix_len + new_idx,
)?;
old_idx += old_len - old_idx;
}

if new_idx < new_len {
d.insert(
old_range.start + prefix_len + old_idx,
new_range.start + prefix_len + new_idx,
new_len - new_idx,
)?;
}

if suffix_len > 0 {
d.equal(
old_range.start + old_len + prefix_len,
new_range.start + new_len + prefix_len,
suffix_len,
)?;
}

d.finish()
}

/// Shortcut for diffing slices.
pub fn diff_slices<D, T>(d: &mut D, old: &[T], new: &[T]) -> Result<(), D::Error>
where
D: DiffHook,
T: PartialEq,
{
diff(d, old, 0..old.len(), new, 0..new.len())
}

fn make_table<Old, New>(
old: &Old,
old_range: Range<usize>,
new: &New,
new_range: Range<usize>,
) -> Vec<Vec<u32>>
where
Old: Index<usize> + ?Sized,
New: Index<usize> + ?Sized,
New::Output: PartialEq<Old::Output>,
{
let old_len = old_range.len();
let new_len = new_range.len();
let mut table = vec![vec![0; old_len + 1]; new_len + 1];

for i in 0..new_len {
let i = new_len - i - 1;
table[i][old_len] = 0;
for j in 0..old_len {
let j = old_len - j - 1;
table[i][j] = if new[i] == old[j] {
table[i + 1][j + 1] + 1
} else {
table[i + 1][j].max(table[i][j + 1])
}
}
}

table
}

#[test]
fn test_table() {
let table = make_table(&vec![2, 3], 0..2, &vec![0, 1, 2], 0..3);
let expected = vec![vec![1, 0, 0], vec![1, 0, 0], vec![1, 0, 0], vec![0, 0, 0]];
assert_eq!(table, expected);
}

#[test]
fn test_diff() {
let a: &[usize] = &[0, 1, 2, 3, 4];
let b: &[usize] = &[0, 1, 2, 9, 4];

let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}

#[test]
fn test_contiguous() {
let a: &[usize] = &[0, 1, 2, 3, 4, 4, 4, 5];
let b: &[usize] = &[0, 1, 2, 8, 9, 4, 4, 7];

let mut d = crate::algorithms::Replace::new(crate::algorithms::Capture::new());
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.into_inner().ops());
}

#[test]
fn test_pat() {
let a: &[usize] = &[0, 1, 3, 4, 5];
let b: &[usize] = &[0, 1, 4, 5, 8, 9];

let mut d = crate::algorithms::Capture::new();
diff_slices(&mut d, a, b).unwrap();
insta::assert_debug_snapshot!(d.ops());
}
2 changes: 2 additions & 0 deletions src/algorithms/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ pub use replace::Replace;
#[doc(no_inline)]
pub use crate::Algorithm;

pub mod lcs;
pub mod myers;
pub mod patience;

Expand All @@ -71,6 +72,7 @@ where
match alg {
Algorithm::Myers => myers::diff(d, old, old_range, new, new_range),
Algorithm::Patience => patience::diff(d, old, old_range, new, new_range),
Algorithm::Lcs => lcs::diff(d, old, old_range, new, new_range),
}
}

Expand Down
28 changes: 28 additions & 0 deletions src/algorithms/snapshots/similar__algorithms__lcs__contiguous.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
---
source: src/algorithms/lcs.rs
expression: d.into_inner().ops()
---
[
Equal {
old_index: 0,
new_index: 0,
len: 3,
},
Replace {
old_index: 3,
old_len: 2,
new_index: 3,
new_len: 2,
},
Equal {
old_index: 5,
new_index: 5,
len: 2,
},
Replace {
old_index: 7,
old_len: 1,
new_index: 7,
new_len: 1,
},
]
22 changes: 22 additions & 0 deletions src/algorithms/snapshots/similar__algorithms__lcs__diff.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
---
source: src/algorithms/lcs.rs
expression: d.into_inner().ops()
---
[
Equal {
old_index: 0,
new_index: 0,
len: 3,
},
Replace {
old_index: 3,
old_len: 1,
new_index: 3,
new_len: 1,
},
Equal {
old_index: 4,
new_index: 4,
len: 1,
},
]
31 changes: 31 additions & 0 deletions src/algorithms/snapshots/similar__algorithms__lcs__pat.snap
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
---
source: src/algorithms/lcs.rs
expression: d.ops()
---
[
Equal {
old_index: 0,
new_index: 0,
len: 2,
},
Delete {
old_index: 2,
old_len: 1,
new_index: 2,
},
Equal {
old_index: 3,
new_index: 2,
len: 1,
},
Equal {
old_index: 4,
new_index: 3,
len: 1,
},
Insert {
old_index: 5,
new_index: 4,
new_len: 2,
},
]
2 changes: 2 additions & 0 deletions src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,8 @@ pub enum Algorithm {
Myers,
/// Picks the patience algorithm from [`crate::algorithms::patience`]
Patience,
/// Picks the LCS algorithm from [`crate::algorithms::lcs`]
Lcs,
}

impl Default for Algorithm {
Expand Down

0 comments on commit bee5d88

Please sign in to comment.