Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Add some theorems for String.substrEq and String.isPrefixOf #782

Closed
wants to merge 8 commits into from
111 changes: 109 additions & 2 deletions Batteries/Data/String/Lemmas.lean
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,9 @@ instance : Batteries.BEqOrd String := .compareOfLessAndEq String.lt_irrefl

@[simp] theorem data_append (s t : String) : (s ++ t).1 = s.1 ++ t.1 := rfl
tjf801 marked this conversation as resolved.
Show resolved Hide resolved

theorem cons_append (c : Char) (cs : List Char) (t : String) :
⟨c :: cs⟩ ++ t = ⟨c :: (cs ++ t.1)⟩ := rfl

attribute [simp] toList -- prefer `String.data` over `String.toList` in lemmas

theorem lt_iff (s t : String) : s < t ↔ s.1 < t.1 := .rfl
Expand Down Expand Up @@ -193,8 +196,31 @@ theorem Valid.mk (cs cs' : List Char) : Valid ⟨cs ++ cs'⟩ ⟨utf8Len cs⟩ :
theorem Valid.le_endPos : ∀ {s p}, Valid s p → p ≤ endPos s
| ⟨_⟩, ⟨_⟩, ⟨cs, cs', rfl, rfl⟩ => by simp [Nat.le_add_right]

@[simp] theorem empty_valid (p : Pos) : Pos.Valid "" p ↔ p = 0 :=
⟨fun hl => Pos.ext <| Nat.le_zero.mp <| hl.le_endPos, fun hr => hr ▸ Pos.valid_zero⟩

theorem Valid.cons_addChar (h : Pos.Valid ⟨c :: cs⟩ (p + c)) : Pos.Valid ⟨cs⟩ p := by
let ⟨cs₁, cs₂, h', p_eq_len_c₁⟩ := h
match (List.append_eq_cons.mp h') with
| Or.inl ⟨_, _⟩ =>
simp_all [Nat.add_eq_zero_iff]
exact absurd p_eq_len_c₁.right.symm (Nat.ne_of_lt <| String.csize_pos c)
| Or.inr ⟨cs₁_tail, _, _⟩ =>
simp_all
exact (Pos.ext <| Nat.add_right_cancel p_eq_len_c₁).symm ▸ Pos.Valid.mk cs₁_tail cs₂

theorem Valid.cons_zero_or_ge_head (h : Pos.Valid ⟨c :: cs⟩ p) : p = 0 ∨ p ≥ ⟨csize c⟩ := by
let ⟨cs₁, cs₂, h', p_eq_len_c₁⟩ := h
match (List.append_eq_cons.mp h') with
| Or.inl ⟨cs₁_nil, _⟩ =>
exact Or.inl <| Pos.ext (utf8Len_nil ▸ cs₁_nil ▸ p_eq_len_c₁ : p.byteIdx = 0)
| Or.inr ⟨_, cs₁_eq, _⟩ =>
rw [cs₁_eq, utf8Len, utf8ByteSize.go, Nat.add_comm] at p_eq_len_c₁
exact Or.inr (Nat.le.intro <| p_eq_len_c₁.symm)

end Pos


theorem endPos_eq_zero : ∀ (s : String), endPos s = 0 ↔ s = ""
| ⟨_⟩ => Pos.ext_iff.trans <| utf8Len_eq_zero.trans ext_iff.symm

Expand Down Expand Up @@ -250,6 +276,27 @@ theorem get_cons_addChar (c : Char) (cs : List Char) (i : Pos) :
get ⟨c :: cs⟩ (i + c) = get ⟨cs⟩ i := by
simp [get, utf8GetAux, Pos.zero_ne_addChar, utf8GetAux_addChar_right_cancel]

@[simp] theorem get_cons_zero : (String.mk (c :: cs)).get 0 = c := rfl

theorem get_append_of_valid {s t} {p : Pos} (h : p.Valid s) (h' : p ≠ endPos s) :
(s ++ t).get p = s.get p := by
match s, t, p with
| ⟨[]⟩, _, _ => simp_all; contradiction;
| ⟨_ :: _⟩, _, ⟨0⟩ => simp_all [cons_append]
| ⟨a :: l⟩, _, p@⟨k+1⟩ => next p_eq_succ =>
have p_ne_zero : p ≠ 0 := by rw [ne_eq, Pos.ext_iff]; simp [p_eq_succ]
have ⟨n, csize_a⟩ : ∃ n, p = ⟨n + csize a⟩ := by
apply Exists.intro (p.1 - csize a)
rw [Nat.sub_add_cancel]
simp [← p_eq_succ] at h
exact Or.resolve_left (h.cons_zero_or_ge_head) p_ne_zero
rw [endPos, ne_eq, Pos.ext_iff] at h'
simp_all [-p_eq_succ, (by rw [p_eq_succ] : k + 1 = p.byteIdx), Nat.add_right_cancel_iff]
rw [← Pos.ext_iff] at h'
rw [← Pos.addChar_eq, get_cons_addChar, cons_append, get_cons_addChar]
rw [← Pos.addChar_eq] at h
exact get_append_of_valid (Pos.Valid.cons_addChar h) h'

theorem utf8GetAux?_of_valid (cs cs' : List Char) {i p : Nat} (hp : i + utf8Len cs = p) :
utf8GetAux? (cs ++ cs') ⟨i⟩ ⟨p⟩ = cs'.head? := by
match cs, cs' with
Expand Down Expand Up @@ -779,8 +826,68 @@ theorem mapAux_of_valid (f : Char → Char) : ∀ l r, mapAux f ⟨utf8Len l⟩
theorem map_eq (f : Char → Char) (s) : map f s = ⟨s.1.map f⟩ := by
simpa using mapAux_of_valid f [] s.1

-- TODO: substrEq
-- TODO: isPrefixOf
Comment on lines -782 to -783
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't remove these lines! These are two of the ten unproved theorems specifying string operations, which I'll prove in the future.

/-! ### substrEq -/

@[simp] theorem substrEq_loop_rfl (s : String) (off stp : Pos) : substrEq.loop s s off off stp := by
rw [substrEq.loop]
simp only [beq_self_eq_true, Bool.true_and, dite_eq_ite, ite_eq_right_iff]
intro h
have : stp.byteIdx - (off.byteIdx + csize (get s off)) < stp.byteIdx - off.byteIdx := by
rw [Nat.sub_add_eq]
exact Nat.sub_lt (Nat.sub_pos_of_lt h) (csize_pos _)
exact substrEq_loop_rfl s _ _
termination_by stp.1 - off.1

theorem substrEq_rfl (s : String) (off : Pos) (n : Nat) (h : off.byteIdx + n ≤ s.endPos.byteIdx) :
substrEq s off s off n := by
simp [substrEq, substrEq_loop_rfl]
exact h

theorem substrEq_loop_self_append {s t : String} {off stp : Pos}
(h_off : off.Valid s) (h_stp : stp.Valid s) : substrEq.loop s (s ++ t) off off stp := by
rw [substrEq.loop]
simp only [dite_eq_ite, ite_eq_right_iff, Bool.and_eq_true, beq_iff_eq]
intro offb_lt_stpb
have off_lt_end := (Nat.lt_of_lt_of_le offb_lt_stpb h_stp.le_endPos)
have off_ne_end := (fun x => Nat.ne_of_lt off_lt_end <| Pos.ext_iff.mp x)
rw [get_append_of_valid h_off off_ne_end]
apply And.intro
· rfl
· have : stp.byteIdx - (off.byteIdx + csize (get s off)) < stp.byteIdx - off.byteIdx := by
simpa [Nat.sub_add_eq] using Nat.sub_lt (Nat.sub_pos_of_lt offb_lt_stpb) (csize_pos _)
exact substrEq_loop_self_append (valid_next h_off off_lt_end) h_stp
termination_by stp.1 - off.1

/-! ### isPrefixOf -/

-- unusedHavesSuffices lint false positive from unfolding substrEq.loop
-- This will be fixed by https://github.com/leanprover/lean4/pull/4143
@[nolint unusedHavesSuffices, simp]
theorem empty_isPrefixOf (s : String) : "".isPrefixOf s := by
simp [isPrefixOf, endPos, utf8ByteSize, substrEq, substrEq.loop]

@[simp] theorem isPrefixOf_empty_iff_empty (s : String) : s.isPrefixOf "" ↔ s = "" := by
apply Iff.intro <;> intro h
· simp [isPrefixOf, substrEq, endPos, utf8ByteSize] at h
exact ext h.left
· simp [empty_isPrefixOf, h]

@[simp] theorem isPrefixOf_self (s : String) : s.isPrefixOf s := by
simp [isPrefixOf, substrEq, substrEq_loop_rfl]

@[simp] theorem isPrefixOf_push_self (s : String) (c : Char) : s.isPrefixOf (s.push c) := by
simp [isPrefixOf, substrEq, push]
apply And.intro
· rw [endPos]
exact Nat.le_add_right _ _
· have mk_data_append : mk (s.data ++ [c]) = s ++ ⟨[c]⟩ := rfl
rw [mk_data_append, substrEq_loop_self_append Pos.valid_zero Pos.valid_endPos]

@[simp] theorem isPrefixOf_self_append (s t : String) : s.isPrefixOf (s ++ t) := by
simp [isPrefixOf, substrEq, substrEq_loop_self_append]
simp only [endPos, utf8ByteSize, utf8ByteSize.go_eq, append, utf8Len_append]
exact Nat.le_add_right _ _

-- TODO: replace

@[nolint unusedHavesSuffices] -- false positive from unfolding String.takeWhileAux
Expand Down