Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add String.splitOn_of_valid #495

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
35 changes: 35 additions & 0 deletions Std/Data/List/Basic.lean
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,18 @@ theorem replicateTR_loop_eq : ∀ n, replicateTR.loop a n acc = replicate n a ++
@[csimp] theorem dropLast_eq_dropLastTR : @dropLast = @dropLastTR := by
funext α l; simp [dropLastTR]

/-- Drop a sublist from the tail end of a list. -/
def rdropSublist [BEq α] : List α → List α → List α
| l, [] => l
| [], _ => []
| a::as, b::bs =>
if a::as == b::bs then
[]
else if as == b::bs then
[a]
else
a::rdropSublist as (b::bs)

/-- Tail recursive version of `intersperse`. -/
def intersperseTR (sep : α) : List α → List α
| [] => []
Expand Down Expand Up @@ -521,6 +533,29 @@ Split a list at every occurrence of a separator element. The separators are not
-/
@[inline] def splitOn [BEq α] (a : α) (as : List α) : List (List α) := as.splitOnP (· == a)

/--
Split a list at every occurrence of a separator sublist. The separators are not in the result.
```
[1, 1, 2, 3, 2, 4, 4].splitOnSublist [2, 3] = [[1, 1], [2, 4, 4]]
```
-/
def splitOnSublist [BEq α] (l sep : List α) : List (List α) := go l sep sep [] where
chabulhwi marked this conversation as resolved.
Show resolved Hide resolved
/-- Auxiliary for `splitOnSublist`. The first explicit argument is a list we want to split, the
second a separator sublist (`sep`), the third a part of `sep` that we'll use to split the first
argument, and the last an accumulated list of elements of the first argument (`acc`). -/
go : List α → List α → List α → List α → List (List α)
| a::as, sep@(b::bs), [], acc =>
if a == b then
(acc.reverse.rdropSublist sep)::go as sep bs [a]
else
(acc.reverse.rdropSublist sep)::go as sep sep [a]
| a::as, sep@(_::_), c::cs, acc =>
if a == c then
go as sep cs (a::acc)
else
go as sep sep (a::acc)
| l, _, _, acc => [acc.reverse++l]
chabulhwi marked this conversation as resolved.
Show resolved Hide resolved

/--
Apply a function to the nth tail of `l`. Returns the input without
using `f` if the index is larger than the length of the List.
Expand Down
35 changes: 35 additions & 0 deletions Std/Data/List/Lemmas.lean
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,18 @@ theorem append_eq_append_iff {a b c d : List α} :
| nil => simp; exact (or_iff_left_of_imp fun ⟨_, ⟨e, rfl⟩, h⟩ => e ▸ h.symm).symm
| cons a as ih => cases c <;> simp [eq_comm, and_assoc, ih, and_or_left]

theorem append_left_eq_self (l m : List α) : l ++ m = m ↔ l = [] := by
constructor <;> intro h
· conv at h => rhs; rw [← nil_append m]
rwa [append_cancel_right_eq] at h
· rw [h, nil_append]

theorem append_right_eq_self (l m : List α) : l ++ m = l ↔ m = [] := by
constructor <;> intro h
· conv at h => rhs; rw [← append_nil l]
rwa [append_cancel_left_eq] at h
· rw [h, append_nil]

@[simp] theorem mem_append {a : α} {s t : List α} : a ∈ s ++ t ↔ a ∈ s ∨ a ∈ t := by
induction s <;> simp_all [or_assoc]

Expand Down Expand Up @@ -720,6 +732,29 @@ are often used for theorems about `Array.pop`. -/
| _::_::_, ⟨0, _⟩ => rfl
| _::_::_, ⟨i+1, _⟩ => get_dropLast _ ⟨i, _⟩

/-! ### rdropSublist -/

@[simp] theorem rdropSublist_self [BEq α] [LawfulBEq α] (l : List α) :
l.rdropSublist l = [] := by
cases l <;> simp [rdropSublist]

@[simp] theorem rdropSublist_append [BEq α] [LawfulBEq α] (l m : List α) :
(l++m).rdropSublist m = l := by
match l, m with
| [], _ => simp [rdropSublist_self]
| a::as, [] => simp [rdropSublist]
| a::as, b::bs =>
simp [rdropSublist]
split
· next h =>
simp at h
rw [append_cons, append_left_eq_self] at h
simp at h
· split <;> rename_i h <;> simp at h ⊢
· simp [append_left_eq_self] at h
exact h.symm
· apply rdropSublist_append

/-! ### nth element -/

@[simp] theorem get_cons_succ {as : List α} {h : i + 1 < (a :: as).length} :
Expand Down
10 changes: 10 additions & 0 deletions Std/Data/Nat/Lemmas.lean
Original file line number Diff line number Diff line change
Expand Up @@ -370,6 +370,16 @@ protected theorem add_le_add_iff_left {n : Nat} : n + m ≤ n + k ↔ m ≤ k :=
protected theorem add_le_add_iff_right {n : Nat} : m + n ≤ k + n ↔ m ≤ k :=
⟨Nat.le_of_add_le_add_right, fun h => Nat.add_le_add_right h _⟩

protected theorem add_left_le_self {n m : Nat} : n + m ≤ m ↔ n = 0 := by
conv => lhs; rhs; rw [← Nat.zero_add m]
rw [Nat.add_le_add_iff_right]
apply Nat.le_zero

protected theorem add_right_le_self {n m : Nat} : n + m ≤ n ↔ m = 0 := by
conv => lhs; rhs; rw [← Nat.add_zero n]
rw [Nat.add_le_add_iff_left]
apply Nat.le_zero

protected theorem lt_of_add_lt_add_right : ∀ {n : Nat}, k + n < m + n → k < m
| 0, h => h
| _+1, h => Nat.lt_of_add_lt_add_right (Nat.lt_of_succ_lt_succ h)
Expand Down
76 changes: 75 additions & 1 deletion Std/Data/String/Lemmas.lean
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,81 @@ theorem splitAux_of_valid (p l m r acc) :
theorem split_of_valid (s p) : split s p = (List.splitOnP p s.1).map mk := by
simpa [split] using splitAux_of_valid p [] [] s.1 []

-- TODO: splitOn
/--
NOTE: DELETE THIS LATER.

A proposed alternative of `String.splitOnAux`.
-/
def splitOnAux' (s sep : String) (b : Pos) (i : Pos) (j : Pos) (r : List String) : List String :=
if sep == "" then
let r := (s.extract b s.endPos)::r
r.reverse
else if h : s.atEnd i then
let r := (s.extract b i)::r
r.reverse
else
have := Nat.sub_lt_sub_left (Nat.gt_of_not_le (mt decide_eq_true h)) (lt_next s _)
if sep.atEnd j then
if s.get i == sep.get 0 then
splitOnAux' s sep i (s.next i) (sep.next 0) (s.extract b (i - j)::r)
else
splitOnAux' s sep i (s.next i) 0 (s.extract b (i - j)::r)
else if s.get i == sep.get j then
splitOnAux' s sep b (s.next i) (sep.next j) r
else
splitOnAux' s sep b (s.next i) 0 r
termination_by _ => s.endPos.1 - i.1

/--
NOTE: DELETE THIS LATER.

A proposed alternative of `String.splitOn`.
-/
def splitOn' (s : String) (sep : String := " ") : List String :=
splitOnAux' s sep 0 0 0 []

theorem splitOnAux_of_valid (sep₁ sep₂ l m r acc) :
splitOnAux' ⟨l++m++sep₁++r⟩ ⟨sep₁++sep₂⟩ ⟨utf8Len l⟩ ⟨utf8Len (l++m++sep₁)⟩ ⟨utf8Len sep₁⟩ acc =
acc.reverse++(List.splitOnSublist.go r (sep₁++sep₂) sep₂ (m++sep₁).reverse).map mk := by
unfold splitOnAux'
simp [ext_iff]
split
· next h => simp [h.1, h.2, by simpa using extract_of_valid l (m++r) [], List.splitOnSublist.go]
· simp [Nat.add_le_add_iff_left, Nat.add_right_le_self]
split
· subst r; simpa [List.splitOnSublist.go] using extract_of_valid l (m++sep₁) []
· obtain ⟨c, r, rfl⟩ := r.exists_cons_of_ne_nil ‹_›
simp [by simpa using (⟨get_of_valid (l++m++sep₁) (c::r), next_of_valid (l++m++sep₁) c r⟩ :
_∧_), next, Pos.addChar_eq, get_of_valid]
split
· subst sep₂
simp [← Nat.add_assoc, Pos.sub_eq, by simpa using extract_of_valid l m (sep₁++c::r)] at *
obtain ⟨c₁, sep₁, rfl⟩ := sep₁.exists_cons_of_ne_nil ‹_›
simp [get, utf8GetAux]
split
· subst c₁
simpa [Nat.add_comm, Nat.add_left_comm, Nat.add_assoc, List.splitOnSublist.go] using
splitOnAux_of_valid [c] sep₁ (l++m++c::sep₁) [] r (⟨m⟩::acc)
· simpa [Nat.add_comm, Nat.add_left_comm, Nat.add_assoc, List.splitOnSublist.go, *] using
splitOnAux_of_valid [] (c₁::sep₁) (l++m++c₁::sep₁) [c] r (⟨m⟩::acc)
· obtain ⟨c₂, sep₂, rfl⟩ := sep₂.exists_cons_of_ne_nil ‹_›
cases sep₁ <;> simp at *
· split
· subst c₂
simpa [Nat.add_comm, Nat.add_left_comm, List.splitOnSublist.go] using
splitOnAux_of_valid [c] sep₂ l m r acc
· simpa [Nat.add_comm, Nat.add_left_comm, List.splitOnSublist.go, *] using
splitOnAux_of_valid [] (c₂::sep₂) l (m++[c]) r acc
· next c₁ sep₁ _ _ _ =>
split
· subst c₂
simpa [Nat.add_comm, Nat.add_left_comm, List.splitOnSublist.go] using
splitOnAux_of_valid (c₁::sep₁++[c]) sep₂ l m r acc
· simpa [Nat.add_comm, Nat.add_left_comm, List.splitOnSublist.go, *] using
splitOnAux_of_valid [] (c₁::sep₁++c₂::sep₂) l (m++c₁::sep₁++[c]) r acc

theorem splitOn_of_valid (s sep) : splitOn' s sep = (List.splitOnSublist s.1 sep.1).map mk := by
simpa [splitOn'] using splitOnAux_of_valid [] sep.1 [] [] s.1 []

@[simp] theorem toString_toSubstring (s : String) : s.toSubstring.toString = s :=
extract_zero_endPos _
Expand Down