|
| 1 | +/- |
| 2 | +Copyright (c) 2024 Jeremy Tan. All rights reserved. |
| 3 | +Released under Apache 2.0 license as described in the file LICENSE. |
| 4 | +Authors: Jeremy Tan |
| 5 | +-/ |
| 6 | +import Mathlib.Algebra.Order.Ring.Nat |
| 7 | +import Mathlib.Data.List.Nodup |
| 8 | + |
| 9 | +/-! |
| 10 | +# Dyck words |
| 11 | +
|
| 12 | +A Dyck word is a sequence consisting of an equal number `n` of symbols of two types such that |
| 13 | +for all prefixes one symbol occurs at least as many times as the other. |
| 14 | +If the symbols are `(` and `)` the latter restriction is equivalent to balanced brackets; |
| 15 | +if they are `U = (1, 1)` and `D = (1, -1)` the sequence is a lattice path from `(0, 0)` to `(0, 2n)` |
| 16 | +and the restriction requires the path to never go below the x-axis. |
| 17 | +
|
| 18 | +## Main definitions |
| 19 | +
|
| 20 | +* `DyckWord`: a list of `U`s and `D`s with as many `U`s as `D`s and with every prefix having |
| 21 | +at least as many `U`s as `D`s. |
| 22 | +* `DyckWord.semilength`: semilength (half the length) of a Dyck word. |
| 23 | +
|
| 24 | +## Implementation notes |
| 25 | +
|
| 26 | +While any two-valued type could have been used for `DyckStep`, a new enumerated type is used here |
| 27 | +to emphasise that the definition of a Dyck word does not depend on that underlying type. |
| 28 | +
|
| 29 | +## TODO |
| 30 | +
|
| 31 | +* Prove the bijection between Dyck words and rooted binary trees |
| 32 | +(https://github.com/leanprover-community/mathlib4/pull/9781). |
| 33 | +-/ |
| 34 | + |
| 35 | +open List |
| 36 | + |
| 37 | +/-- A `DyckStep` is either `U` or `D`, corresponding to `(` and `)` respectively. -/ |
| 38 | +inductive DyckStep |
| 39 | + | U : DyckStep |
| 40 | + | D : DyckStep |
| 41 | + deriving Inhabited, DecidableEq |
| 42 | + |
| 43 | +/-- Named in analogy to `Bool.dichotomy`. -/ |
| 44 | +lemma DyckStep.dichotomy (s : DyckStep) : s = U ∨ s = D := by cases s <;> tauto |
| 45 | + |
| 46 | +open DyckStep |
| 47 | + |
| 48 | +/-- A Dyck word is a list of `DyckStep`s with as many `U`s as `D`s and with every prefix having |
| 49 | +at least as many `U`s as `D`s. -/ |
| 50 | +@[ext] |
| 51 | +structure DyckWord where |
| 52 | + /-- The underlying list -/ |
| 53 | + toList : List DyckStep |
| 54 | + /-- There are as many `U`s as `D`s -/ |
| 55 | + count_U_eq_count_D : toList.count U = toList.count D |
| 56 | + /-- Each prefix has as least as many `U`s as `D`s -/ |
| 57 | + count_D_le_count_U i : (toList.take i).count D ≤ (toList.take i).count U |
| 58 | + deriving DecidableEq |
| 59 | + |
| 60 | +attribute [coe] DyckWord.toList |
| 61 | +instance : Coe DyckWord (List DyckStep) := ⟨DyckWord.toList⟩ |
| 62 | + |
| 63 | +instance : Add DyckWord where |
| 64 | + add p q := ⟨p ++ q, by |
| 65 | + simp only [count_append, p.count_U_eq_count_D, q.count_U_eq_count_D], by |
| 66 | + simp only [take_append_eq_append_take, count_append] |
| 67 | + exact fun _ ↦ add_le_add (p.count_D_le_count_U _) (q.count_D_le_count_U _)⟩ |
| 68 | + |
| 69 | +instance : Zero DyckWord := ⟨[], by simp, by simp⟩ |
| 70 | + |
| 71 | +/-- Dyck words form an additive monoid under concatenation, with the empty word as 0. -/ |
| 72 | +instance : AddMonoid DyckWord where |
| 73 | + add_zero p := by ext1; exact append_nil _ |
| 74 | + zero_add p := by ext1; rfl |
| 75 | + add_assoc p q r := by ext1; apply append_assoc |
| 76 | + nsmul := nsmulRec |
| 77 | + |
| 78 | +namespace DyckWord |
| 79 | + |
| 80 | +variable (p q : DyckWord) |
| 81 | + |
| 82 | +lemma toList_eq_nil : p.toList = [] ↔ p = 0 := by rw [DyckWord.ext_iff]; rfl |
| 83 | + |
| 84 | +/-- The first element of a nonempty Dyck word is `U`. -/ |
| 85 | +lemma head_eq_U (h : p.toList ≠ []) : p.toList.head h = U := by |
| 86 | + rcases p with - | s; · tauto |
| 87 | + rw [head_cons] |
| 88 | + by_contra f |
| 89 | + rename_i _ nonneg |
| 90 | + simpa [s.dichotomy.resolve_left f] using nonneg 1 |
| 91 | + |
| 92 | +/-- The last element of a nonempty Dyck word is `D`. -/ |
| 93 | +lemma getLast_eq_D (h : p.toList ≠ []) : p.toList.getLast h = D := by |
| 94 | + by_contra f; have s := p.count_U_eq_count_D |
| 95 | + rw [← dropLast_append_getLast h, (dichotomy _).resolve_right f] at s |
| 96 | + simp_rw [dropLast_eq_take, count_append, count_singleton', ite_true, ite_false] at s |
| 97 | + have := p.count_D_le_count_U (p.toList.length - 1); omega |
| 98 | + |
| 99 | +lemma cons_tail_dropLast_concat (h : p.toList ≠ []) : |
| 100 | + U :: p.toList.dropLast.tail ++ [D] = p := by |
| 101 | + have : p.toList.dropLast.take 1 = [p.toList.head h] := by |
| 102 | + rcases p with - | ⟨s, ⟨- | ⟨t, r⟩⟩⟩ |
| 103 | + · tauto |
| 104 | + · rename_i bal _ |
| 105 | + cases s <;> simp at bal |
| 106 | + · tauto |
| 107 | + nth_rw 2 [← p.toList.dropLast_append_getLast h, ← p.toList.dropLast.take_append_drop 1] |
| 108 | + rw [p.getLast_eq_D, drop_one, this, p.head_eq_U] |
| 109 | + rfl |
| 110 | + |
| 111 | +/-- Prefix of a Dyck word as a Dyck word, given that the count of `U`s and `D`s in it are equal. -/ |
| 112 | +def take (i : ℕ) (hi : (p.toList.take i).count U = (p.toList.take i).count D) : DyckWord where |
| 113 | + toList := p.toList.take i |
| 114 | + count_U_eq_count_D := hi |
| 115 | + count_D_le_count_U k := by rw [take_take]; exact p.count_D_le_count_U (min k i) |
| 116 | + |
| 117 | +/-- Suffix of a Dyck word as a Dyck word, given that the count of `U`s and `D`s in the prefix |
| 118 | +are equal. -/ |
| 119 | +def drop (i : ℕ) (hi : (p.toList.take i).count U = (p.toList.take i).count D) : DyckWord where |
| 120 | + toList := p.toList.drop i |
| 121 | + count_U_eq_count_D := by |
| 122 | + have := p.count_U_eq_count_D |
| 123 | + rw [← take_append_drop i p.toList, count_append, count_append] at this |
| 124 | + omega |
| 125 | + count_D_le_count_U k := by |
| 126 | + rw [show i = min i (i + k) by omega, ← take_take] at hi |
| 127 | + rw [take_drop, ← add_le_add_iff_left (((p.toList.take (i + k)).take i).count U), |
| 128 | + ← count_append, hi, ← count_append, take_append_drop] |
| 129 | + exact p.count_D_le_count_U _ |
| 130 | + |
| 131 | +/-- Nest `p` in one pair of brackets, i.e. `x` becomes `(x)`. -/ |
| 132 | +def nest : DyckWord where |
| 133 | + toList := [U] ++ p ++ [D] |
| 134 | + count_U_eq_count_D := by simp [p.count_U_eq_count_D] |
| 135 | + count_D_le_count_U i := by |
| 136 | + simp only [take_append_eq_append_take, count_append] |
| 137 | + rw [← add_rotate (count D _), ← add_rotate (count U _)] |
| 138 | + apply add_le_add _ (p.count_D_le_count_U _) |
| 139 | + rcases i.eq_zero_or_pos with hi | hi; · simp [hi] |
| 140 | + rw [take_of_length_le (show [U].length ≤ i by rwa [length_singleton]), count_singleton'] |
| 141 | + simp only [ite_true, ite_false] |
| 142 | + rw [add_comm] |
| 143 | + exact add_le_add (zero_le _) ((count_le_length _ _).trans (by simp)) |
| 144 | + |
| 145 | +/-- Denest `p`, i.e. `(x)` becomes `x`, given that `p` is strictly positive in its interior |
| 146 | +(this ensures that `x` is a Dyck word). -/ |
| 147 | +def denest (h : p.toList ≠ []) (pos : ∀ i, 0 < i → i < p.toList.length → |
| 148 | + (p.toList.take i).count D < (p.toList.take i).count U) : DyckWord where |
| 149 | + toList := p.toList.dropLast.tail |
| 150 | + count_U_eq_count_D := by |
| 151 | + have := p.count_U_eq_count_D |
| 152 | + rw [← p.cons_tail_dropLast_concat h, count_append, count_cons] at this |
| 153 | + simpa using this |
| 154 | + count_D_le_count_U i := by |
| 155 | + have l1 : p.toList.take 1 = [p.toList.head h] := by rcases p with - | - <;> tauto |
| 156 | + have l3 : p.toList.length - 1 = p.toList.length - 1 - 1 + 1 := by |
| 157 | + rcases p with - | ⟨s, ⟨- | ⟨t, r⟩⟩⟩ |
| 158 | + · tauto |
| 159 | + · rename_i bal _ |
| 160 | + cases s <;> simp at bal |
| 161 | + · tauto |
| 162 | + rw [← drop_one, take_drop, dropLast_eq_take, take_take] |
| 163 | + have ub : min (1 + i) (p.toList.length - 1) < p.toList.length := |
| 164 | + (min_le_right _ p.toList.length.pred).trans_lt (Nat.pred_lt ((length_pos.mpr h).ne')) |
| 165 | + have lb : 0 < min (1 + i) (p.toList.length - 1) := by |
| 166 | + rw [l3, add_comm, min_add_add_right]; omega |
| 167 | + have eq := pos _ lb ub |
| 168 | + set j := min (1 + i) (p.toList.length - 1) |
| 169 | + rw [← (p.toList.take j).take_append_drop 1, count_append, count_append, take_take, |
| 170 | + min_eq_left (by omega), l1, p.head_eq_U] at eq |
| 171 | + simp only [count_singleton', ite_true, ite_false] at eq |
| 172 | + omega |
| 173 | + |
| 174 | +lemma denest_nest (h : p.toList ≠ []) (pos : ∀ i, 0 < i → i < p.toList.length → |
| 175 | + (p.toList.take i).count D < (p.toList.take i).count U) : (p.denest h pos).nest = p := by |
| 176 | + simpa [DyckWord.ext_iff] using p.cons_tail_dropLast_concat h |
| 177 | + |
| 178 | +section Semilength |
| 179 | + |
| 180 | +/-- The semilength of a Dyck word is half of the number of `DyckStep`s in it, or equivalently |
| 181 | +its number of `U`s. -/ |
| 182 | +def semilength : ℕ := p.toList.count U |
| 183 | + |
| 184 | +@[simp] lemma semilength_zero : semilength 0 = 0 := rfl |
| 185 | +@[simp] lemma semilength_add : (p + q).semilength = p.semilength + q.semilength := count_append .. |
| 186 | +@[simp] lemma semilength_nest : p.nest.semilength = p.semilength + 1 := by simp [semilength, nest] |
| 187 | + |
| 188 | +lemma semilength_eq_count_D : p.semilength = p.toList.count D := by |
| 189 | + rw [← count_U_eq_count_D]; rfl |
| 190 | + |
| 191 | +@[simp] |
| 192 | +lemma two_mul_semilength_eq_length : 2 * p.semilength = p.toList.length := by |
| 193 | + nth_rw 1 [two_mul, semilength, p.count_U_eq_count_D, semilength] |
| 194 | + convert (p.toList.length_eq_countP_add_countP (· == D)).symm |
| 195 | + rw [count]; congr!; rename_i s; cases s <;> tauto |
| 196 | + |
| 197 | +end Semilength |
| 198 | + |
| 199 | +end DyckWord |
0 commit comments