Skip to content

Commit

Permalink
feat: lower bounds for Levenshtein edit distance (#6118)
Browse files Browse the repository at this point in the history
Co-authored-by: Scott Morrison <scott.morrison@gmail.com>
Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
Co-authored-by: Thomas Browning <tb65536@uw.edu>
Co-authored-by: Chris Hughes <chrishughes24@gmail.com>
Co-authored-by: Oliver Nash <github@olivernash.org>
Co-authored-by: sgouezel <sebastien.gouezel@univ-rennes1.fr>
Co-authored-by: Alex J Best <alex.j.best@gmail.com>
Co-authored-by: Xavier Roblot <46200072+xroblot@users.noreply.github.com>
Co-authored-by: michaellee94 <michael_lee1@brown.edu>
  • Loading branch information
10 people committed Aug 29, 2023
1 parent b9d6ccb commit d7b5d38
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 2 deletions.
1 change: 1 addition & 0 deletions Mathlib.lean
Expand Up @@ -1498,6 +1498,7 @@ import Mathlib.Data.List.Dedup
import Mathlib.Data.List.Defs
import Mathlib.Data.List.Destutter
import Mathlib.Data.List.Duplicate
import Mathlib.Data.List.EditDistance.Bounds
import Mathlib.Data.List.EditDistance.Defs
import Mathlib.Data.List.FinRange
import Mathlib.Data.List.Forall2
Expand Down
98 changes: 98 additions & 0 deletions Mathlib/Data/List/EditDistance/Bounds.lean
@@ -0,0 +1,98 @@
/-
Copyright (c) 2023 Kim Liesinger. All rights reserved.
Released under Apache 2.0 license as described in the file LICENSE.
Authors: Kim Liesinger
-/
import Mathlib.Data.List.Infix
import Mathlib.Data.List.MinMax
import Mathlib.Data.List.EditDistance.Defs

/-!
# Lower bounds for Levenshtein distances
We show that there is some suffix `L'` of `L` such
that the Levenshtein distance from `L'` to `M` gives a lower bound
for the Levenshtein distance from `L` to `m :: M`.
This allows us to use the intermediate steps of a Levenshtein distance calculation
to produce lower bounds on the final result.
-/

set_option autoImplicit true

variable {C : Levenshtein.Cost α β δ} [CanonicallyLinearOrderedAddMonoid δ]

theorem suffixLevenshtein_minimum_le_levenshtein_cons (xs : List α) (y ys) :
(suffixLevenshtein C xs ys).1.minimum ≤ levenshtein C xs (y :: ys) := by
induction xs with
| nil =>
simp only [suffixLevenshtein_nil', levenshtein_nil_cons,
List.minimum_singleton, WithTop.coe_le_coe]
exact le_add_of_nonneg_left (by simp)
| cons x xs ih =>
suffices :
(suffixLevenshtein C (x :: xs) ys).1.minimum ≤ (C.delete x + levenshtein C xs (y :: ys)) ∧
(suffixLevenshtein C (x :: xs) ys).1.minimum ≤ (C.insert y + levenshtein C (x :: xs) ys) ∧
(suffixLevenshtein C (x :: xs) ys).1.minimum ≤ (C.substitute x y + levenshtein C xs ys)
· simpa [suffixLevenshtein_eq_tails_map]
refine ⟨?_, ?_, ?_⟩
· calc
_ ≤ (suffixLevenshtein C xs ys).1.minimum := by
simp [suffixLevenshtein_cons₁_fst, List.minimum_cons]
_ ≤ ↑(levenshtein C xs (y :: ys)) := ih
_ ≤ _ := by simp
· calc
(suffixLevenshtein C (x :: xs) ys).1.minimum ≤ (levenshtein C (x :: xs) ys) := by
simp [suffixLevenshtein_cons₁_fst, List.minimum_cons]
_ ≤ _ := by simp
· calc
(suffixLevenshtein C (x :: xs) ys).1.minimum ≤ (levenshtein C xs ys) := by
simp only [suffixLevenshtein_cons₁_fst, List.minimum_cons]
apply min_le_of_right_le
cases xs
· simp [suffixLevenshtein_nil']
· simp [suffixLevenshtein_cons₁, List.minimum_cons]
_ ≤ _ := by simp

theorem le_suffixLevenshtein_cons_minimum (xs : List α) (y ys) :
(suffixLevenshtein C xs ys).1.minimum ≤ (suffixLevenshtein C xs (y :: ys)).1.minimum := by
apply List.le_minimum_of_forall_le
simp only [suffixLevenshtein_eq_tails_map]
simp only [List.mem_map, List.mem_tails, forall_exists_index, and_imp, forall_apply_eq_imp_iff₂]
intro a suff
refine (?_ : _ ≤ _).trans (suffixLevenshtein_minimum_le_levenshtein_cons _ _ _)
simp only [suffixLevenshtein_eq_tails_map]
apply List.le_minimum_of_forall_le
intro b m
replace m : ∃ a_1, a_1 <:+ a ∧ levenshtein C a_1 ys = b
· simpa using m
obtain ⟨a', suff', rfl⟩ := m
apply List.minimum_le_of_mem'
simp only [List.mem_map, List.mem_tails]
suffices : ∃ a, a <:+ xs ∧ levenshtein C a ys = levenshtein C a' ys
· simpa
exact ⟨a', suff'.trans suff, rfl⟩

theorem le_suffixLevenshtein_append_minimum (xs : List α) (ys₁ ys₂) :
(suffixLevenshtein C xs ys₂).1.minimum ≤ (suffixLevenshtein C xs (ys₁ ++ ys₂)).1.minimum := by
induction ys₁ with
| nil => exact le_refl _
| cons y ys₁ ih => exact ih.trans (le_suffixLevenshtein_cons_minimum _ _ _)

theorem suffixLevenshtein_minimum_le_levenshtein_append (xs ys₁ ys₂) :
(suffixLevenshtein C xs ys₂).1.minimum ≤ levenshtein C xs (ys₁ ++ ys₂) := by
cases ys₁ with
| nil => exact List.minimum_le_of_mem' (List.get_mem _ _ _)
| cons y ys₁ =>
exact (le_suffixLevenshtein_append_minimum _ _ _).trans
(suffixLevenshtein_minimum_le_levenshtein_cons _ _ _)

theorem le_levenshtein_cons (xs : List α) (y ys) :
∃ xs', xs' <:+ xs ∧ levenshtein C xs' ys ≤ levenshtein C xs (y :: ys) := by
simpa [suffixLevenshtein_eq_tails_map, List.minimum_le_coe_iff] using
suffixLevenshtein_minimum_le_levenshtein_cons (δ := δ) xs y ys

theorem le_levenshtein_append (xs : List α) (ys₁ ys₂) :
∃ xs', xs' <:+ xs ∧ levenshtein C xs' ys₂ ≤ levenshtein C xs (ys₁ ++ ys₂) := by
simpa [suffixLevenshtein_eq_tails_map, List.minimum_le_coe_iff] using
suffixLevenshtein_minimum_le_levenshtein_append (δ := δ) xs ys₁ ys₂
14 changes: 12 additions & 2 deletions Mathlib/Data/List/MinMax.lean
Expand Up @@ -362,9 +362,9 @@ theorem le_maximum_of_mem' (ha : a ∈ l) : (a : WithBot α) ≤ maximum l :=
le_of_not_lt <| not_lt_maximum_of_mem' ha
#align list.le_maximum_of_mem' List.le_maximum_of_mem'

theorem le_minimum_of_mem' (ha : a ∈ l) : minimum l ≤ (a : WithTop α) :=
theorem minimum_le_of_mem' (ha : a ∈ l) : minimum l ≤ (a : WithTop α) :=
@le_maximum_of_mem' αᵒᵈ _ _ _ ha
#align list.le_minimum_of_mem' List.le_minimum_of_mem'
#align list.le_minimum_of_mem' List.minimum_le_of_mem'

theorem minimum_concat (a : α) (l : List α) : minimum (l ++ [a]) = min (minimum l) a :=
@maximum_concat αᵒᵈ _ _ _
Expand All @@ -379,6 +379,16 @@ theorem minimum_cons (a : α) (l : List α) : minimum (a :: l) = min ↑a (minim
@maximum_cons αᵒᵈ _ _ _
#align list.minimum_cons List.minimum_cons

theorem maximum_le_of_forall_le {b : WithBot α} (h : ∀ a ∈ l, a ≤ b) : l.maximum ≤ b := by
induction l with
| nil => simp
| cons a l ih =>
simp only [maximum_cons, ge_iff_le, max_le_iff, WithBot.coe_le_coe]
exact ⟨h a (by simp), ih fun a w => h a (mem_cons.mpr (Or.inr w))⟩

theorem le_minimum_of_forall_le {b : WithTop α} (h : ∀ a ∈ l, b ≤ a) : b ≤ l.minimum :=
maximum_le_of_forall_le (α:= αᵒᵈ) h

theorem maximum_eq_coe_iff : maximum l = m ↔ m ∈ l ∧ ∀ a ∈ l, a ≤ m := by
rw [maximum, ← WithBot.some_eq_coe, argmax_eq_some_iff]
simp only [id_eq, and_congr_right_iff, and_iff_left_iff_imp]
Expand Down

0 comments on commit d7b5d38

Please sign in to comment.