Skip to content

Commit f276328

Browse files
committed
feat: bijection between Dyck words and rooted binary trees (#9781)
1 parent f1ae38a commit f276328

File tree

1 file changed

+179
-41
lines changed

1 file changed

+179
-41
lines changed

Mathlib/Combinatorics/Enumerative/DyckWord.lean

Lines changed: 179 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3,9 +3,8 @@ Copyright (c) 2024 Jeremy Tan. All rights reserved.
33
Released under Apache 2.0 license as described in the file LICENSE.
44
Authors: Jeremy Tan
55
-/
6+
import Mathlib.Combinatorics.Enumerative.Catalan
67
import Mathlib.Data.List.Indexes
7-
import Mathlib.Logic.Relation
8-
import Mathlib.Tactic.Positivity.Core
98

109
/-!
1110
# Dyck words
@@ -16,22 +15,29 @@ If the symbols are `(` and `)` the latter restriction is equivalent to balanced
1615
if they are `U = (1, 1)` and `D = (1, -1)` the sequence is a lattice path from `(0, 0)` to `(0, 2n)`
1716
and the restriction requires the path to never go below the x-axis.
1817
18+
This file defines Dyck words and constructs their bijection with rooted binary trees,
19+
one consequence being that the number of Dyck words with length `2 * n` is `catalan n`.
20+
1921
## Main definitions
2022
2123
* `DyckWord`: a list of `U`s and `D`s with as many `U`s as `D`s and with every prefix having
2224
at least as many `U`s as `D`s.
2325
* `DyckWord.semilength`: semilength (half the length) of a Dyck word.
2426
* `DyckWord.firstReturn`: for a nonempty word, the index of the `D` matching the initial `U`.
2527
28+
## Main results
29+
30+
* `DyckWord.equivTree`: equivalence between Dyck words and rooted binary trees.
31+
See the docstrings of `DyckWord.equivTreeToFun` and `DyckWord.equivTreeInvFun` for details.
32+
* `DyckWord.equivTreesOfNumNodesEq`: equivalence between Dyck words of length `2 * n` and
33+
rooted binary trees with `n` internal nodes.
34+
* `DyckWord.card_dyckWord_semilength_eq_catalan`:
35+
there are `catalan n` Dyck words of length `2 * n` or semilength `n`.
36+
2637
## Implementation notes
2738
2839
While any two-valued type could have been used for `DyckStep`, a new enumerated type is used here
2940
to emphasise that the definition of a Dyck word does not depend on that underlying type.
30-
31-
## TODO
32-
33-
* Prove the bijection between Dyck words and rooted binary trees
34-
(https://github.com/leanprover-community/mathlib4/pull/9781).
3541
-/
3642

3743
open List
@@ -166,18 +172,30 @@ def nest : DyckWord where
166172
@[simp] lemma nest_ne_zero : p.nest ≠ 0 := by simp [← toList_ne_nil, nest]
167173

168174
variable (p) in
169-
/-- Denest `p`, i.e. `(x)` becomes `x`, given that `p` is strictly positive in its interior
170-
(this ensures that `x` is a Dyck word). -/
171-
def denest (pos : ∀ i, 0 < i → i < p.toList.length →
172-
(p.toList.take i).count D < (p.toList.take i).count U) : DyckWord where
175+
/-- A property stating that `p` is nonempty and strictly positive in its interior,
176+
i.e. is of the form `(x)` with `x` a Dyck word. -/
177+
def IsNested : Prop :=
178+
p ≠ 0 ∧ ∀ ⦃i⦄, 0 < i → i < p.toList.length → (p.toList.take i).count D < (p.toList.take i).count U
179+
180+
protected lemma IsNested.nest : p.nest.IsNested := ⟨nest_ne_zero, fun i lb ub ↦ by
181+
simp_rw [nest, length_append, length_singleton] at ub ⊢
182+
rw [take_append_of_le_length (by rw [singleton_append, length_cons]; omega),
183+
take_append_eq_append_take, take_of_length_le (by rw [length_singleton]; omega),
184+
length_singleton, singleton_append, count_cons_of_ne (by simp), count_cons_self,
185+
Nat.lt_add_one_iff]
186+
exact p.count_D_le_count_U _⟩
187+
188+
variable (p) in
189+
/-- Denest `p`, i.e. `(x)` becomes `x`, given that `p.IsNested`. -/
190+
def denest (hn : p.IsNested) : DyckWord where
173191
toList := p.toList.dropLast.tail
174192
count_U_eq_count_D := by
175193
have := p.count_U_eq_count_D
176-
rw [← cons_tail_dropLast_concat h, count_append, count_cons] at this
194+
rw [← cons_tail_dropLast_concat hn.1, count_append, count_cons] at this
177195
simpa using this
178196
count_D_le_count_U i := by
179-
have h' := toList_ne_nil.mpr h
180-
have l1 : p.toList.take 1 = [p.toList.head h'] := by rcases p with - | - <;> tauto
197+
replace h := toList_ne_nil.mpr hn.1
198+
have l1 : p.toList.take 1 = [p.toList.head h] := by rcases p with - | - <;> tauto
181199
have l3 : p.toList.length - 1 = p.toList.length - 1 - 1 + 1 := by
182200
rcases p with - | ⟨s, ⟨- | ⟨t, r⟩⟩⟩
183201
· tauto
@@ -186,19 +204,23 @@ def denest (pos : ∀ i, 0 < i → i < p.toList.length →
186204
· tauto
187205
rw [← drop_one, take_drop, dropLast_eq_take, take_take]
188206
have ub : min (1 + i) (p.toList.length - 1) < p.toList.length :=
189-
(min_le_right _ p.toList.length.pred).trans_lt (Nat.pred_lt ((length_pos.mpr h').ne'))
207+
(min_le_right _ p.toList.length.pred).trans_lt (Nat.pred_lt ((length_pos.mpr h).ne'))
190208
have lb : 0 < min (1 + i) (p.toList.length - 1) := by
191209
rw [l3, add_comm, min_add_add_right]; omega
192-
have eq := pos _ lb ub
210+
have eq := hn.2 lb ub
193211
set j := min (1 + i) (p.toList.length - 1)
194212
rw [← (p.toList.take j).take_append_drop 1, count_append, count_append, take_take,
195213
min_eq_left (by omega), l1, head_eq_U] at eq
196214
simp only [count_singleton', ite_true, ite_false] at eq
197215
omega
198216

199-
lemma denest_nest (pos : ∀ i, 0 < i → i < p.toList.length →
200-
(p.toList.take i).count D < (p.toList.take i).count U) : (p.denest h pos).nest = p := by
201-
simpa [DyckWord.ext_iff] using p.cons_tail_dropLast_concat h
217+
variable (p) in
218+
lemma nest_denest (hn) : (p.denest hn).nest = p := by
219+
simpa [DyckWord.ext_iff] using p.cons_tail_dropLast_concat hn.1
220+
221+
variable (p) in
222+
lemma denest_nest : p.nest.denest .nest = p := by
223+
simp_rw [nest, denest, DyckWord.ext_iff, dropLast_concat]; rfl
202224

203225
section Semilength
204226

@@ -297,22 +319,19 @@ lemma firstReturn_nest : p.nest.firstReturn = p.toList.length + 1 := by
297319
· simp_rw [length_range, u, length_append, length_cons]
298320
exact Nat.lt_add_one _
299321

300-
lemma firstReturn_nest_add : (p.nest + q).firstReturn = p.toList.length + 1 := by simp
301-
302322
variable (p) in
303323
/-- The left part of the Dyck word decomposition,
304324
inside the `U, D` pair that `firstReturn` refers to. `insidePart 0 = 0`. -/
305325
def insidePart : DyckWord :=
306326
if h : p = 0 then 0 else
307327
(p.take (p.firstReturn + 1) (count_take_firstReturn_add_one h)).denest
308-
(by rw [← toList_ne_nil, take]; simpa using toList_ne_nil.mpr h)
309-
(fun i lb ub ↦ by
328+
by rw [← toList_ne_nil, take]; simpa using toList_ne_nil.mpr h, fun i lb ub ↦ by
310329
simp only [take, length_take, lt_min_iff] at ub ⊢
311330
replace ub := ub.1
312331
rw [take_take, min_eq_left ub.le]
313332
rw [show i = i - 1 + 1 by omega] at ub ⊢
314333
rw [Nat.add_lt_add_iff_right] at ub
315-
exact count_D_lt_count_U_of_lt_firstReturn ub)
334+
exact count_D_lt_count_U_of_lt_firstReturn ub
316335

317336
variable (p) in
318337
/-- The right part of the Dyck word decomposition,
@@ -323,10 +342,37 @@ def outsidePart : DyckWord :=
323342
@[simp] lemma insidePart_zero : insidePart 0 = 0 := by simp [insidePart]
324343
@[simp] lemma outsidePart_zero : outsidePart 0 = 0 := by simp [outsidePart]
325344

345+
include h in
346+
@[simp]
347+
lemma insidePart_add : (p + q).insidePart = p.insidePart := by
348+
simp_rw [insidePart, firstReturn_add, add_eq_zero', h, false_and, dite_false, ite_false,
349+
DyckWord.ext_iff, take]
350+
congr 3
351+
exact take_append_of_le_length (firstReturn_lt_length h)
352+
353+
include h in
354+
@[simp]
355+
lemma outsidePart_add : (p + q).outsidePart = p.outsidePart + q := by
356+
simp_rw [outsidePart, firstReturn_add, add_eq_zero', h, false_and, dite_false, ite_false,
357+
DyckWord.ext_iff, drop]
358+
exact drop_append_of_le_length (firstReturn_lt_length h)
359+
360+
@[simp]
361+
lemma insidePart_nest : p.nest.insidePart = p := by
362+
simp_rw [insidePart, nest_ne_zero, dite_false, firstReturn_nest]
363+
convert p.denest_nest; rw [DyckWord.ext_iff]; apply take_of_length_le
364+
simp_rw [nest, length_append, length_singleton]; omega
365+
366+
@[simp]
367+
lemma outsidePart_nest : p.nest.outsidePart = 0 := by
368+
simp_rw [outsidePart, nest_ne_zero, dite_false, firstReturn_nest]
369+
rw [DyckWord.ext_iff]; apply drop_of_length_le
370+
simp_rw [nest, length_append, length_singleton]; omega
371+
326372
include h in
327373
@[simp]
328374
theorem nest_insidePart_add_outsidePart : p.insidePart.nest + p.outsidePart = p := by
329-
simp_rw [insidePart, outsidePart, h, dite_false, denest_nest, DyckWord.ext_iff]
375+
simp_rw [insidePart, outsidePart, h, dite_false, nest_denest, DyckWord.ext_iff]
330376
apply take_append_drop
331377

332378
include h in
@@ -345,19 +391,6 @@ theorem semilength_outsidePart_lt : p.outsidePart.semilength < p.semilength := b
345391
have := semilength_insidePart_add_semilength_outsidePart_add_one h
346392
omega
347393

348-
theorem insidePart_nest_add : (p.nest + q).insidePart = p := by
349-
have : p.toList.length + 1 + 1 = p.nest.toList.length := by simp [nest]
350-
simp_rw [insidePart, firstReturn_nest_add, take,
351-
show (p.nest + q).toList = p.nest.toList ++ q.toList by rfl, take_append_eq_append_take,
352-
this, take_length, tsub_self, take_zero, append_nil, denest, nest, dropLast_concat]
353-
rfl
354-
355-
theorem outsidePart_nest_add : (p.nest + q).outsidePart = q := by
356-
have : p.toList.length + 1 + 1 = p.nest.toList.length := by simp [nest]
357-
simp_rw [outsidePart, add_eq_zero', nest_ne_zero, false_and, dite_false,
358-
firstReturn_nest_add, drop, show (p.nest + q).toList = p.nest.toList ++ q.toList by rfl,
359-
drop_append_eq_append_drop, this, drop_length, nil_append, tsub_self, drop_zero]
360-
361394
end FirstReturn
362395

363396
section Order
@@ -367,13 +400,16 @@ instance : Preorder DyckWord where
367400
le_refl p := Relation.ReflTransGen.refl
368401
le_trans p q r := Relation.ReflTransGen.trans
369402

370-
lemma zero_le (p : DyckWord) : 0 ≤ p := by
403+
lemma le_add_self (p q : DyckWord) : q ≤ p + q := by
371404
by_cases h : p = 0
372405
· simp [h]
373-
· have := semilength_insidePart_lt h
374-
exact (zero_le _).trans (Relation.ReflTransGen.single (Or.inl rfl))
406+
· have := semilength_outsidePart_lt h
407+
exact (le_add_self p.outsidePart q).trans
408+
(Relation.ReflTransGen.single (Or.inr (outsidePart_add h).symm))
375409
termination_by p.semilength
376410

411+
variable (p) in protected lemma zero_le : 0 ≤ p := add_zero p ▸ le_add_self p 0
412+
377413
lemma infix_of_le (h : p ≤ q) : p.toList <:+: q.toList := by
378414
induction h with
379415
| refl => exact infix_refl _
@@ -392,6 +428,19 @@ lemma infix_of_le (h : p ≤ q) : p.toList <:+: q.toList := by
392428
use [U] ++ r.insidePart ++ [D], []; rwa [append_nil]
393429
exact ih.trans (hm ▸ this)
394430

431+
lemma le_of_suffix (h : p.toList <:+ q.toList) : p ≤ q := by
432+
obtain ⟨r', h⟩ := h
433+
have hc : (q.toList.take (q.toList.length - p.toList.length)).count U =
434+
(q.toList.take (q.toList.length - p.toList.length)).count D := by
435+
have hq := q.count_U_eq_count_D
436+
rw [← h] at hq ⊢
437+
rw [count_append, count_append, p.count_U_eq_count_D, Nat.add_right_cancel_iff] at hq
438+
simp [hq]
439+
let r : DyckWord := q.take _ hc
440+
have e : r' = r := by
441+
simp_rw [r, take, ← h, length_append, add_tsub_cancel_right, take_left']
442+
rw [e] at h; replace h : r + p = q := DyckWord.ext h; rw [← h]; exact le_add_self ..
443+
395444
/-- Partial order on Dyck words: `p ≤ q` if a (possibly empty) sequence of
396445
`insidePart` and `outsidePart` operations can turn `q` into `p`. -/
397446
instance : PartialOrder DyckWord where
@@ -400,6 +449,10 @@ instance : PartialOrder DyckWord where
400449
have h₂ := infix_of_le qp
401450
exact DyckWord.ext <| h₁.eq_of_length <| h₁.length_le.antisymm h₂.length_le
402451

452+
protected lemma pos_iff_ne_zero : 0 < p ↔ p ≠ 0 := by
453+
rw [ne_comm, iff_comm, ne_iff_lt_iff_le]
454+
exact DyckWord.zero_le p
455+
403456
lemma monotone_semilength : Monotone semilength := fun p q pq ↦ by
404457
induction pq with
405458
| refl => rfl
@@ -422,6 +475,91 @@ lemma strictMono_semilength : StrictMono semilength := fun p q pq ↦ by
422475

423476
end Order
424477

478+
section Tree
479+
480+
open Tree
481+
482+
/-- Convert a Dyck word to a binary rooted tree.
483+
484+
`f(0) = nil`. For a nonzero word find the `D` that matches the initial `U`,
485+
which has index `p.firstReturn`, then let `x` be everything strictly between said `U` and `D`,
486+
and `y` be everything strictly after said `D`. `p = x.nest + y` with `x, y` (possibly empty)
487+
Dyck words. `f(p) = f(x) △ f(y)`, where △ (defined in `Mathlib.Data.Tree`) joins two subtrees
488+
to a new root node. -/
489+
private def equivTreeToFun (p : DyckWord) : Tree Unit :=
490+
if h : p = 0 then nil else
491+
have := semilength_insidePart_lt h
492+
have := semilength_outsidePart_lt h
493+
equivTreeToFun p.insidePart △ equivTreeToFun p.outsidePart
494+
termination_by p.semilength
495+
496+
/-- Convert a binary rooted tree to a Dyck word.
497+
498+
`g(nil) = 0`. A nonempty tree with left subtree `l` and right subtree `r`
499+
is sent to `g(l).nest + g(r)`. -/
500+
private def equivTreeInvFun : Tree Unit → DyckWord
501+
| Tree.nil => 0
502+
| Tree.node _ l r => (equivTreeInvFun l).nest + equivTreeInvFun r
503+
504+
@[nolint unusedHavesSuffices]
505+
private lemma equivTree_left_inv (p) : equivTreeInvFun (equivTreeToFun p) = p := by
506+
by_cases h : p = 0
507+
· simp [h, equivTreeToFun, equivTreeInvFun]
508+
· rw [equivTreeToFun]
509+
simp_rw [h, dite_false, equivTreeInvFun]
510+
have := semilength_insidePart_lt h
511+
have := semilength_outsidePart_lt h
512+
rw [equivTree_left_inv p.insidePart, equivTree_left_inv p.outsidePart]
513+
exact nest_insidePart_add_outsidePart h
514+
termination_by p.semilength
515+
516+
@[nolint unusedHavesSuffices]
517+
private lemma equivTree_right_inv : ∀ t, equivTreeToFun (equivTreeInvFun t) = t
518+
| Tree.nil => by simp [equivTreeInvFun, equivTreeToFun]
519+
| Tree.node _ _ _ => by simp [equivTreeInvFun, equivTreeToFun, equivTree_right_inv]
520+
521+
/-- Equivalence between Dyck words and rooted binary trees. -/
522+
def equivTree : DyckWord ≃ Tree Unit where
523+
toFun := equivTreeToFun
524+
invFun := equivTreeInvFun
525+
left_inv := equivTree_left_inv
526+
right_inv := equivTree_right_inv
527+
528+
@[nolint unusedHavesSuffices]
529+
lemma semilength_eq_numNodes_equivTree (p) : p.semilength = (equivTree p).numNodes := by
530+
by_cases h : p = 0
531+
· simp [h, equivTree, equivTreeToFun]
532+
· rw [equivTree, Equiv.coe_fn_mk, equivTreeToFun]
533+
simp_rw [h, dite_false, numNodes]
534+
have := semilength_insidePart_lt h
535+
have := semilength_outsidePart_lt h
536+
rw [← semilength_insidePart_add_semilength_outsidePart_add_one h,
537+
semilength_eq_numNodes_equivTree p.insidePart,
538+
semilength_eq_numNodes_equivTree p.outsidePart]; rfl
539+
termination_by p.semilength
540+
541+
/-- Equivalence between Dyck words of semilength `n` and rooted binary trees with
542+
`n` internal nodes. -/
543+
def equivTreesOfNumNodesEq (n : ℕ) :
544+
{ p : DyckWord // p.semilength = n } ≃ treesOfNumNodesEq n where
545+
toFun := fun ⟨p, _⟩ ↦ ⟨equivTree p, by
546+
rwa [mem_treesOfNumNodesEq, ← semilength_eq_numNodes_equivTree]⟩
547+
invFun := fun ⟨tr, _⟩ ↦ ⟨equivTree.symm tr, by
548+
rwa [semilength_eq_numNodes_equivTree, ← mem_treesOfNumNodesEq, Equiv.apply_symm_apply]⟩
549+
left_inv _ := by simp only [Equiv.symm_apply_apply]
550+
right_inv _ := by simp only [Equiv.apply_symm_apply]
551+
552+
instance {n : ℕ} : Fintype { p : DyckWord // p.semilength = n } :=
553+
Fintype.ofEquiv _ (equivTreesOfNumNodesEq n).symm
554+
555+
/-- There are `catalan n` Dyck words of semilength `n` (or length `2 * n`). -/
556+
theorem card_dyckWord_semilength_eq_catalan (n : ℕ) :
557+
Fintype.card { p : DyckWord // p.semilength = n } = catalan n := by
558+
rw [← Fintype.ofEquiv_card (equivTreesOfNumNodesEq n), ← treesOfNumNodesEq_card_eq_catalan]
559+
convert Fintype.card_coe _
560+
561+
end Tree
562+
425563
end DyckWord
426564

427565
namespace Mathlib.Meta.Positivity

0 commit comments

Comments
 (0)