pascalkuthe · Byron · Mar 22, 2025 · Mar 21, 2025
diff --git a/README.md b/README.md
@@ -17,7 +17,7 @@ comparing the same file to multiple different files.
 `imara-diff` provides two diff algorithms:
 
 * The linear-space variant of the well known [Myers algorithm](http://www.xmailserver.org/diff2.pdf) 
-* The **Histogram** algorithm which variant of the patience diff algorithm.
+* The **Histogram** algorithm which is a variant of the patience diff algorithm.
 
 Myers algorithm has been enhanced with preprocessing and multiple heuristics to ensure fast runtime in pathological 
 cases to avoid quadratic time complexity and closely matches the behavior of gnu-diff and git.
@@ -109,9 +109,9 @@ The sourcecode of the helix editor.
 
 ## Stability Policy
 
-`imara-diff` uses [Semantic Versioning (SemVar)](https://semver.org/).
-All non-breaking changes to the public rust API will cause a minor `SemVar` bump.
-All breaking changes to to the public rust API will cause a major `SemVar` bump.
+`imara-diff` uses [Semantic Versioning (SemVer)](https://semver.org/).
+All non-breaking changes to the public rust API will cause a minor `SemVer` bump.
+All breaking changes to to the public rust API will cause a major `SemVer` bump.
 Changes in the produced diffs are also considered breaking changes if the produced diff was valid.
 If the produced diff was invalid the change will be considered a bugfix.
 

diff --git a/src/histogram.rs b/src/histogram.rs
@@ -12,7 +12,7 @@ mod list_pool;
 const MAX_CHAIN_LEN: u32 = 63;
 
 struct Histogram {
-    token_occurances: Vec<ListHandle>,
+    token_occurrences: Vec<ListHandle>,
     pool: ListPool,
 }
 
@@ -32,7 +32,7 @@ pub fn diff<S: Sink>(
 impl Histogram {
     fn new(num_buckets: u32) -> Histogram {
         Histogram {
-            token_occurances: vec![ListHandle::default(); num_buckets as usize],
+            token_occurrences: vec![ListHandle::default(); num_buckets as usize],
             pool: ListPool::new(2 * num_buckets),
         }
     }
@@ -41,17 +41,17 @@ impl Histogram {
         self.pool.clear();
     }
 
-    fn token_occurances(&self, token: Token) -> &[u32] {
-        self.token_occurances[token.0 as usize].as_slice(&self.pool)
+    fn token_occurrences(&self, token: Token) -> &[u32] {
+        self.token_occurrences[token.0 as usize].as_slice(&self.pool)
     }
 
-    fn num_token_occurances(&self, token: Token) -> u32 {
-        self.token_occurances[token.0 as usize].len(&self.pool)
+    fn num_token_occurrences(&self, token: Token) -> u32 {
+        self.token_occurrences[token.0 as usize].len(&self.pool)
     }
 
     fn populate(&mut self, file: &[Token]) {
         for (i, &token) in file.iter().enumerate() {
-            self.token_occurances[token.0 as usize].push(i as u32, &mut self.pool);
+            self.token_occurrences[token.0 as usize].push(i as u32, &mut self.pool);
         }
     }
 
@@ -109,9 +109,9 @@ impl Histogram {
                     after_off += after_end;
                 }
                 None => {
-                    // we are diffing two extremly large repetitive file
+                    // we are diffing two extremely large repetitive files
                     // this is a worst case for histogram diff with O(N^2) performance
-                    // fallback to myers to maintain linear time complxity
+                    // fallback to myers to maintain linear time complexity
                     myers::diff(
                         before,
                         after,

diff --git a/src/histogram/lcs.rs b/src/histogram/lcs.rs
@@ -8,7 +8,7 @@ pub(super) fn find_lcs(
 ) -> Option<Lcs> {
     let mut search = LcsSearch {
         lcs: Lcs::default(),
-        min_occurances: MAX_CHAIN_LEN + 1,
+        min_occurrences: MAX_CHAIN_LEN + 1,
         found_cs: false,
     };
     search.run(before, after, histogram);
@@ -28,17 +28,17 @@ pub struct Lcs {
 
 pub struct LcsSearch {
     lcs: Lcs,
-    min_occurances: u32,
+    min_occurrences: u32,
     found_cs: bool,
 }
 
 impl LcsSearch {
     fn run(&mut self, before: &[Token], after: &[Token], histogram: &mut Histogram) {
         let mut pos = 0;
         while let Some(&token) = after.get(pos as usize) {
-            if histogram.num_token_occurances(token) != 0 {
+            if histogram.num_token_occurrences(token) != 0 {
                 self.found_cs = true;
-                if histogram.num_token_occurances(token) <= self.min_occurances {
+                if histogram.num_token_occurrences(token) <= self.min_occurrences {
                     pos = self.update_lcs(pos, token, histogram, before, after);
                     continue;
                 }
@@ -51,7 +51,7 @@ impl LcsSearch {
     }
 
     fn success(&mut self) -> bool {
-        !self.found_cs || self.min_occurances <= MAX_CHAIN_LEN
+        !self.found_cs || self.min_occurrences <= MAX_CHAIN_LEN
     }
 
     fn update_lcs(
@@ -63,11 +63,11 @@ impl LcsSearch {
         after: &[Token],
     ) -> u32 {
         let mut next_token_idx2 = after_pos + 1;
-        let mut occurances_iter = histogram.token_occurances(token).iter().copied();
-        let mut token_idx1 = occurances_iter.next().unwrap();
+        let mut occurrences_iter = histogram.token_occurrences(token).iter().copied();
+        let mut token_idx1 = occurrences_iter.next().unwrap();
 
-        'occurances_iter: loop {
-            let mut occurances = histogram.num_token_occurances(token);
+        'occurrences_iter: loop {
+            let mut occurrences = histogram.num_token_occurrences(token);
             let mut start1 = token_idx1;
             let mut start2 = after_pos;
             loop {
@@ -79,8 +79,8 @@ impl LcsSearch {
                 if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
                     start1 -= 1;
                     start2 -= 1;
-                    let new_occurances = histogram.num_token_occurances(before[start1 as usize]);
-                    occurances = occurances.min(new_occurances);
+                    let new_occurrences = histogram.num_token_occurrences(before[start1 as usize]);
+                    occurrences = occurrences.min(new_occurrences);
                 } else {
                     break;
                 }
@@ -93,8 +93,8 @@ impl LcsSearch {
                 let token1 = before.get(end1 as usize);
                 let token2 = after.get(end2 as usize);
                 if matches!((token1, token2), (Some(token1), Some(token2)) if token1 == token2) {
-                    let new_occurances = histogram.num_token_occurances(before[end1 as usize]);
-                    occurances = occurances.min(new_occurances);
+                    let new_occurrences = histogram.num_token_occurrences(before[end1 as usize]);
+                    occurrences = occurrences.min(new_occurrences);
                     end1 += 1;
                     end2 += 1;
                 } else {
@@ -108,8 +108,8 @@ impl LcsSearch {
 
             let len = end2 - start2;
             debug_assert_eq!(len, end1 - start1);
-            if self.lcs.len < len || self.min_occurances > occurances {
-                self.min_occurances = occurances;
+            if self.lcs.len < len || self.min_occurrences > occurrences {
+                self.min_occurrences = occurrences;
                 self.lcs = Lcs {
                     before_start: start1,
                     after_start: start2,
@@ -118,13 +118,13 @@ impl LcsSearch {
             }
 
             loop {
-                if let Some(next_token_idx) = occurances_iter.next() {
+                if let Some(next_token_idx) = occurrences_iter.next() {
                     if next_token_idx > end2 {
                         token_idx1 = next_token_idx;
                         break;
                     }
                 } else {
-                    break 'occurances_iter;
+                    break 'occurrences_iter;
                 }
             }
         }

diff --git a/src/histogram/list_pool.rs b/src/histogram/list_pool.rs
@@ -72,8 +72,8 @@ impl Default for ListHandle {
     }
 }
 
-const MAX_SIZE_CLAS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1);
-const NUM_SIZE_CLASS: usize = MAX_SIZE_CLAS as usize + 1;
+const MAX_SIZE_CLASS: SizeClass = sclass_for_length(super::MAX_CHAIN_LEN - 1);
+const NUM_SIZE_CLASS: usize = MAX_SIZE_CLASS as usize + 1;
 
 /// A memory pool for storing lists of `T`.
 #[derive(Clone, Debug)]

diff --git a/src/intern.rs b/src/intern.rs
@@ -10,7 +10,7 @@ use hashbrown::DefaultHashBuilder as RandomState;
 /// For text this is usually a line, a word or a single character.
 /// All [algorithms](crate::Algorithm) operate on interned tokens instead
 /// of using the token data directly.
-/// This allows for much better performance by amortizing the cost hashing/equality.
+/// This allows for much better performance by amortizing the cost of hashing/equality.
 ///
 /// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`] module.
 #[derive(PartialEq, Eq, Hash, Clone, Copy, Debug)]
@@ -42,7 +42,7 @@ pub trait TokenSource {
 /// For text this is usually a line, a word or a single character.
 /// All [algorithms](crate::Algorithm) operate on interned tokens instead
 /// of using the token data directly.
-/// This allows for much better performance by amortizing the cost hashing/equality.
+/// This allows for much better performance by amortizing the cost of hashing/equality.
 ///
 /// While you can intern tokens yourself it is strongly recommended to use [`InternedInput`] module.
 #[derive(Default)]
@@ -74,7 +74,7 @@ impl<T: Eq + Hash> InternedInput<T> {
         res
     }
 
-    /// replaces `self.before` wtih the iterned Tokens yielded by `input`
+    /// replaces `self.before` with the interned Tokens yielded by `input`
     /// Note that this does not erase any tokens from the interner and might therefore be considered
     /// a memory leak. If this function is called often over a long_running process
     /// consider clearing the interner with [`clear`](crate::intern::Interner::clear).
@@ -84,7 +84,7 @@ impl<T: Eq + Hash> InternedInput<T> {
             .extend(input.map(|token| self.interner.intern(token)));
     }
 
-    /// replaces `self.before` wtih the iterned Tokens yielded by `input`
+    /// replaces `self.before` with the interned Tokens yielded by `input`
     /// Note that this does not erase any tokens from the interner and might therefore be considered
     /// a memory leak. If this function is called often over a long_running process
     /// consider clearing the interner with [`clear`](crate::intern::Interner::clear) or

diff --git a/src/lib.rs b/src/lib.rs
@@ -10,8 +10,8 @@
 //!
 //! Imara-diff provides two diff algorithms:
 //!
-//! * The linear-space variant of the well known [**myer** algorithm](http://www.xmailserver.org/diff2.pdf)
-//! * The **Histogram** algorithm which variant of the patience diff algorithm.
+//! * The linear-space variant of the well known [**Myers** algorithm](http://www.xmailserver.org/diff2.pdf)
+//! * The **Histogram** algorithm which is a variant of the patience diff algorithm.
 //!
 //! Myers algorithm has been enhanced with preprocessing and multiple heuristics to ensure fast runtime in pathological
 //! cases to avoid quadratic time complexity and closely matches the behaviour of gnu-diff and git.
@@ -22,7 +22,7 @@
 //! For example while comparing multiple different linux kernel it performs up to 30 times better than the `similar` crate:
 #![cfg_attr(doc, doc=concat!("<img width=\"600\" class=\"figure\" src=\"data:image/svg+xml;base64,", include_str!("../plots/linux_comparison.svg.base64"), "\"></img>"))]
 //!
-//! # Api Overview
+//! # API Overview
 //!
 //! Imara-diff provides the [`UnifiedDiffBuilder`](crate::UnifiedDiffBuilder) for building
 //! a human-readable diff similar to the output of `git diff` or `diff -u`.
@@ -127,7 +127,7 @@
 //!
 //! For `&str` and `&[u8]` imara-diff will compute a line diff by default.
 //! To perform diffs of different tokenizations and collections you can implement the [`TokenSource`](crate::intern::TokenSource) trait.
-//! For example the imara-diff provides an alternative tokenziser for line-diffs that includes the line terminator in the line:
+//! For example the imara-diff provides an alternative tokenizer for line-diffs that includes the line terminator in the line:
 //!
 //! ```
 //! use imara_diff::intern::InternedInput;