From 0489657cc5fd791ce1bf051f8046356285c05360 Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Mon, 6 May 2024 14:45:30 -0400 Subject: [PATCH 1/6] Add lemmas necessary for isPrefixOf proofs - `String.cons_append` - `String.Pos.empty_valid` - `String.Pos.Valid.cons_addChar` - `String.Pos.Valid.cons_zero_or_ge_head` Update Lemmas.lean Update Lemmas.lean --- Std/Data/String/Lemmas.lean | 80 ++++++++++++++++++++++++++++++++++++- 1 file changed, 79 insertions(+), 1 deletion(-) diff --git a/Std/Data/String/Lemmas.lean b/Std/Data/String/Lemmas.lean index 28f9c76ad4..33a2d2659b 100644 --- a/Std/Data/String/Lemmas.lean +++ b/Std/Data/String/Lemmas.lean @@ -60,6 +60,9 @@ instance : Std.BEqOrd String := .compareOfLessAndEq String.lt_irrefl @[simp] theorem data_append (s t : String) : (s ++ t).1 = s.1 ++ t.1 := rfl +@[simp] theorem cons_append (c : Char) (cs : List Char) (t : String) : + ⟨c :: cs⟩ ++ t = ⟨c :: (cs ++ t.1)⟩ := rfl + attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas theorem lt_iff (s t : String) : s < t ↔ s.1 < t.1 := .rfl @@ -193,8 +196,31 @@ theorem Valid.mk (cs cs' : List Char) : Valid ⟨cs ++ cs'⟩ ⟨utf8Len cs⟩ : theorem Valid.le_endPos : ∀ {s p}, Valid s p → p ≤ endPos s | ⟨_⟩, ⟨_⟩, ⟨cs, cs', rfl, rfl⟩ => by simp [Nat.le_add_right] +@[simp] theorem empty_valid (p : Pos) : Pos.Valid "" p ↔ p = 0 := + ⟨fun hl => Pos.ext <| Nat.le_zero.mp <| hl.le_endPos, fun hr => hr ▸ Pos.valid_zero⟩ + +theorem Valid.cons_addChar (h : Pos.Valid ⟨c :: cs⟩ (p + c)) : Pos.Valid ⟨cs⟩ p := by + let ⟨cs₁, cs₂, h', p_eq_len_c₁⟩ := h + match (List.append_eq_cons.mp h') with + | Or.inl ⟨_, _⟩ => + simp_all [Nat.add_eq_zero_iff] + exact absurd p_eq_len_c₁.right.symm (Nat.ne_of_lt <| String.csize_pos c) + | Or.inr ⟨cs₁_tail, _, _⟩ => + simp_all + exact (Pos.ext <| Nat.add_right_cancel p_eq_len_c₁).symm ▸ Pos.Valid.mk cs₁_tail cs₂ + +theorem Valid.cons_zero_or_ge_head (h : Pos.Valid ⟨c :: cs⟩ p) : p = 0 ∨ p ≥ ⟨csize c⟩ := by + let ⟨cs₁, cs₂, h', p_eq_len_c₁⟩ := h + match (List.append_eq_cons.mp h') with + | Or.inl ⟨cs₁_nil, _⟩ => + exact Or.inl <| Pos.ext (utf8Len_nil ▸ cs₁_nil ▸ p_eq_len_c₁ : p.byteIdx = 0) + | Or.inr ⟨_, cs₁_eq, _⟩ => + rw [cs₁_eq, utf8Len, utf8ByteSize.go, Nat.add_comm] at p_eq_len_c₁ + exact Or.inr (Nat.le.intro <| p_eq_len_c₁.symm) + end Pos + theorem endPos_eq_zero : ∀ (s : String), endPos s = 0 ↔ s = "" | ⟨_⟩ => Pos.ext_iff.trans <| utf8Len_eq_zero.trans ext_iff.symm @@ -250,6 +276,27 @@ theorem get_cons_addChar (c : Char) (cs : List Char) (i : Pos) : get ⟨c :: cs⟩ (i + c) = get ⟨cs⟩ i := by simp [get, utf8GetAux, Pos.zero_ne_addChar, utf8GetAux_addChar_right_cancel] +@[simp] theorem get_cons_zero : (String.mk (c :: cs)).get 0 = c := rfl + +theorem get_append_of_valid {s t} {p : Pos} (h : p.Valid s) (h' : p ≠ endPos s) : + (s ++ t).get p = s.get p := by + match s, t, p with + | ⟨[]⟩, _, _ => simp_all; contradiction; + | ⟨_ :: _⟩, _, ⟨0⟩ => simp_all + | ⟨a :: l⟩, _, p@⟨k+1⟩ => next p_eq_succ => + have p_ne_zero : p ≠ 0 := by rw [ne_eq, Pos.ext_iff]; simp [p_eq_succ] + have ⟨n, csize_a⟩ : ∃ n, p = ⟨n + csize a⟩ := by + apply Exists.intro (p.1 - csize a) + rw [Nat.sub_add_cancel] + simp [← p_eq_succ] at h + exact Or.resolve_left (h.cons_zero_or_ge_head) p_ne_zero + rw [endPos, ne_eq, Pos.ext_iff] at h' + simp_all [-p_eq_succ, (by rw [p_eq_succ] : k + 1 = p.byteIdx), Nat.add_right_cancel_iff] + rw [← Pos.ext_iff] at h' + rw [← Pos.addChar_eq, get_cons_addChar, get_cons_addChar] + rw [← Pos.addChar_eq] at h + exact get_append_of_valid (Pos.Valid.cons_addChar h) h' + theorem utf8GetAux?_of_valid (cs cs' : List Char) {i p : Nat} (hp : i + utf8Len cs = p) : utf8GetAux? (cs ++ cs') ⟨i⟩ ⟨p⟩ = cs'.head? := by match cs, cs' with @@ -779,7 +826,38 @@ theorem mapAux_of_valid (f : Char → Char) : ∀ l r, mapAux f ⟨utf8Len l⟩ theorem map_eq (f : Char → Char) (s) : map f s = ⟨s.1.map f⟩ := by simpa using mapAux_of_valid f [] s.1 --- TODO: substrEq +/-! ### substrEq -/ + +@[simp] theorem substrEq_loop_rfl (s : String) (off stp : Pos) : substrEq.loop s s off off stp := by + rw [substrEq.loop] + simp only [beq_self_eq_true, Bool.true_and, dite_eq_ite, ite_eq_right_iff] + intro h + have : stp.byteIdx - (off.byteIdx + csize (get s off)) < stp.byteIdx - off.byteIdx := by + rw [Nat.sub_add_eq] + exact Nat.sub_lt (Nat.sub_pos_of_lt h) (csize_pos _) + exact substrEq_loop_rfl s _ _ +termination_by stp.1 - off.1 + +theorem substrEq_rfl (s : String) (off : Pos) (n : Nat) (h : off.byteIdx + n ≤ s.endPos.byteIdx) : + substrEq s off s off n := by + simp [substrEq, substrEq_loop_rfl] + exact h + +theorem substrEq_loop_self_append {s t : String} {off stp : Pos} + (h_off : off.Valid s) (h_stp : stp.Valid s) : substrEq.loop s (s ++ t) off off stp := by + rw [substrEq.loop] + simp only [dite_eq_ite, ite_eq_right_iff, Bool.and_eq_true, beq_iff_eq] + intro offb_lt_stpb + have off_lt_end := (Nat.lt_of_lt_of_le offb_lt_stpb h_stp.le_endPos) + have off_ne_end := (fun x => Nat.ne_of_lt off_lt_end <| Pos.ext_iff.mp x) + rw [get_append_of_valid h_off off_ne_end] + apply And.intro + · rfl + · have : stp.byteIdx - (off.byteIdx + csize (get s off)) < stp.byteIdx - off.byteIdx := by + simpa [Nat.sub_add_eq] using Nat.sub_lt (Nat.sub_pos_of_lt offb_lt_stpb) (csize_pos _) + exact substrEq_loop_self_append (valid_next h_off off_lt_end) h_stp +termination_by stp.1 - off.1 + -- TODO: isPrefixOf -- TODO: replace From b4afe21ba23ffd8172d6eb6a837fa10be2c44b9c Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Mon, 6 May 2024 14:45:58 -0400 Subject: [PATCH 2/6] Add isPrefixOf proofs --- Std/Data/String/Lemmas.lean | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/Std/Data/String/Lemmas.lean b/Std/Data/String/Lemmas.lean index 33a2d2659b..6c37f13d2f 100644 --- a/Std/Data/String/Lemmas.lean +++ b/Std/Data/String/Lemmas.lean @@ -858,7 +858,34 @@ theorem substrEq_loop_self_append {s t : String} {off stp : Pos} exact substrEq_loop_self_append (valid_next h_off off_lt_end) h_stp termination_by stp.1 - off.1 --- TODO: isPrefixOf +/-! ### isPrefixOf -/ + +@[nolint unusedHavesSuffices, simp] -- false positive from unfolding substrEq.loop +theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by + simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop] + +@[simp] theorem isPrefixOf_empty_iff_empty (s : String) : s.isPrefixOf "" ↔ s = "" := by + apply Iff.intro <;> intro h + · simp [isPrefixOf, substrEq, endPos, utf8ByteSize] at h + exact ext h.left + · simp [empty_isPrefixOf, h] + +@[simp] theorem isPrefixOf_self (s : String) : s.isPrefixOf s := by + simp [isPrefixOf, substrEq, substrEq_loop_rfl] + +@[simp] theorem isPrefixOf_push_self (s : String) (c : Char) : s.isPrefixOf (s.push c) := by + simp [isPrefixOf, substrEq, push] + apply And.intro + · rw [endPos] + exact Nat.le_add_right _ _ + · have mk_data_append : mk (s.data ++ [c]) = s ++ ⟨[c]⟩ := rfl + rw [mk_data_append, substrEq_loop_self_append Pos.valid_zero Pos.valid_endPos] + +@[simp] theorem isPrefixOf_self_append (s t : String) : s.isPrefixOf (s ++ t) := by + simp [isPrefixOf, substrEq, substrEq_loop_self_append] + simp only [endPos, utf8ByteSize, utf8ByteSize.go_eq, append, utf8Len_append] + exact Nat.le_add_right _ _ + -- TODO: replace @[nolint unusedHavesSuffices] -- false positive from unfolding String.takeWhileAux From b0ac67fbca6fca9934ba4f6d66a8dc8c960d0b86 Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Sun, 12 May 2024 23:09:03 -0400 Subject: [PATCH 3/6] add info about `unusedHavesSuffices` lint Co-authored-by: Kim Morrison --- Std/Data/String/Lemmas.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Std/Data/String/Lemmas.lean b/Std/Data/String/Lemmas.lean index 6c37f13d2f..e1b461e965 100644 --- a/Std/Data/String/Lemmas.lean +++ b/Std/Data/String/Lemmas.lean @@ -860,7 +860,9 @@ termination_by stp.1 - off.1 /-! ### isPrefixOf -/ -@[nolint unusedHavesSuffices, simp] -- false positive from unfolding substrEq.loop +-- unusedHavesSuffices lint false positive from unfolding substrEq.loop +-- This will be fixed by https://github.com/leanprover/lean4/pull/4143 +@[nolint unusedHavesSuffices, simp] theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop] From 9c1bf76bd0fcc1abd4e5d002ae140140f8e52654 Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Sun, 12 May 2024 23:11:17 -0400 Subject: [PATCH 4/6] remove unnecessary `data_append` theorem --- Std/Data/String/Lemmas.lean | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/Std/Data/String/Lemmas.lean b/Std/Data/String/Lemmas.lean index e1b461e965..e5fe60e90e 100644 --- a/Std/Data/String/Lemmas.lean +++ b/Std/Data/String/Lemmas.lean @@ -58,8 +58,6 @@ instance : Std.BEqOrd String := .compareOfLessAndEq String.lt_irrefl @[simp] theorem data_push (s : String) (c : Char) : (s.push c).1 = s.1 ++ [c] := rfl -@[simp] theorem data_append (s t : String) : (s ++ t).1 = s.1 ++ t.1 := rfl - @[simp] theorem cons_append (c : Char) (cs : List Char) (t : String) : ⟨c :: cs⟩ ++ t = ⟨c :: (cs ++ t.1)⟩ := rfl @@ -862,7 +860,7 @@ termination_by stp.1 - off.1 -- unusedHavesSuffices lint false positive from unfolding substrEq.loop -- This will be fixed by https://github.com/leanprover/lean4/pull/4143 -@[nolint unusedHavesSuffices, simp] +@[nolint unusedHavesSuffices, simp] theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop] From cc2174d2e787736e50656b17325e7c36a799736e Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Mon, 13 May 2024 14:28:14 -0400 Subject: [PATCH 5/6] Revert "remove unnecessary `data_append` theorem" This reverts commit 9c1bf76bd0fcc1abd4e5d002ae140140f8e52654. --- Batteries/Data/String/Lemmas.lean | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/Batteries/Data/String/Lemmas.lean b/Batteries/Data/String/Lemmas.lean index e7d943cd46..05547acd60 100644 --- a/Batteries/Data/String/Lemmas.lean +++ b/Batteries/Data/String/Lemmas.lean @@ -58,6 +58,8 @@ instance : Batteries.BEqOrd String := .compareOfLessAndEq String.lt_irrefl @[simp] theorem data_push (s : String) (c : Char) : (s.push c).1 = s.1 ++ [c] := rfl +@[simp] theorem data_append (s t : String) : (s ++ t).1 = s.1 ++ t.1 := rfl + @[simp] theorem cons_append (c : Char) (cs : List Char) (t : String) : ⟨c :: cs⟩ ++ t = ⟨c :: (cs ++ t.1)⟩ := rfl @@ -860,7 +862,7 @@ termination_by stp.1 - off.1 -- unusedHavesSuffices lint false positive from unfolding substrEq.loop -- This will be fixed by https://github.com/leanprover/lean4/pull/4143 -@[nolint unusedHavesSuffices, simp] +@[nolint unusedHavesSuffices, simp] theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop] From c7d6250d1688d5ba0629ff12f553f0315b5d0e42 Mon Sep 17 00:00:00 2001 From: tjf801 <47402907+tjf801@users.noreply.github.com> Date: Mon, 13 May 2024 14:31:02 -0400 Subject: [PATCH 6/6] make `cons_append` not `@[simp]` --- Batteries/Data/String/Lemmas.lean | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Batteries/Data/String/Lemmas.lean b/Batteries/Data/String/Lemmas.lean index 05547acd60..afec354fd5 100644 --- a/Batteries/Data/String/Lemmas.lean +++ b/Batteries/Data/String/Lemmas.lean @@ -60,7 +60,7 @@ instance : Batteries.BEqOrd String := .compareOfLessAndEq String.lt_irrefl @[simp] theorem data_append (s t : String) : (s ++ t).1 = s.1 ++ t.1 := rfl -@[simp] theorem cons_append (c : Char) (cs : List Char) (t : String) : +theorem cons_append (c : Char) (cs : List Char) (t : String) : ⟨c :: cs⟩ ++ t = ⟨c :: (cs ++ t.1)⟩ := rfl attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas @@ -282,7 +282,7 @@ theorem get_append_of_valid {s t} {p : Pos} (h : p.Valid s) (h' : p ≠ endPos s (s ++ t).get p = s.get p := by match s, t, p with | ⟨[]⟩, _, _ => simp_all; contradiction; - | ⟨_ :: _⟩, _, ⟨0⟩ => simp_all + | ⟨_ :: _⟩, _, ⟨0⟩ => simp_all [cons_append] | ⟨a :: l⟩, _, p@⟨k+1⟩ => next p_eq_succ => have p_ne_zero : p ≠ 0 := by rw [ne_eq, Pos.ext_iff]; simp [p_eq_succ] have ⟨n, csize_a⟩ : ∃ n, p = ⟨n + csize a⟩ := by @@ -293,7 +293,7 @@ theorem get_append_of_valid {s t} {p : Pos} (h : p.Valid s) (h' : p ≠ endPos s rw [endPos, ne_eq, Pos.ext_iff] at h' simp_all [-p_eq_succ, (by rw [p_eq_succ] : k + 1 = p.byteIdx), Nat.add_right_cancel_iff] rw [← Pos.ext_iff] at h' - rw [← Pos.addChar_eq, get_cons_addChar, get_cons_addChar] + rw [← Pos.addChar_eq, get_cons_addChar, cons_append, get_cons_addChar] rw [← Pos.addChar_eq] at h exact get_append_of_valid (Pos.Valid.cons_addChar h) h' @@ -862,7 +862,7 @@ termination_by stp.1 - off.1 -- unusedHavesSuffices lint false positive from unfolding substrEq.loop -- This will be fixed by https://github.com/leanprover/lean4/pull/4143 -@[nolint unusedHavesSuffices, simp] +@[nolint unusedHavesSuffices, simp] theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop]