feat(data/fintype/card_embedding): the birthday problem (#7363)

ericrbg · ericrbg · commit 233eff0dd210 · 2021-05-21T11:28:01.000Z
Co-authored-by: Eric &lt;37984851+ericrbg@users.noreply.github.com&gt;
diff --git a/archive/100-theorems-list/93_birthday_problem.lean b/archive/100-theorems-list/93_birthday_problem.lean
@@ -0,0 +1,22 @@
+/-
+Copyright (c) 2021 Eric Rodriguez. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Eric Rodriguez
+-/
+import data.fintype.card_embedding
+
+/-!
+# Birthday Problem
+
+This file proves Theorem 93 from the [100 Theorems List](https://www.cs.ru.nl/~freek/100/).
+
+As opposed to the standard probabilistic statement, we instead state the birthday problem
+in terms of injective functions. The general result about `fintype.card (α ↪ β)` which this proof
+uses is `fintype.card_embedding`.
+-/
+
+local notation `‖` x `‖` := fintype.card x
+
+theorem birthday :
+  2 * ‖fin 23 ↪ fin 365‖ < ‖fin 23 → fin 365‖ ∧ 2 * ‖fin 22 ↪ fin 365‖ > ‖fin 22 → fin 365‖ :=
+by split; norm_num [nat.desc_fac]
diff --git a/docs/100.yaml b/docs/100.yaml
@@ -343,6 +343,10 @@
   title  : Pick’s Theorem
 93:
   title  : The Birthday Problem
+  decl   : fintype.card_embedding
+  links  :
+    mathlib archive: https://github.com/leanprover-community/mathlib/blob/master/archive/100-theorems-list/93_birthday_problem.lean
+  author : Eric Rodriguez
 94:
   title  : The Law of Cosines
   decl   : euclidean_geometry.dist_sq_eq_dist_sq_add_dist_sq_sub_two_mul_dist_mul_dist_mul_cos_angle
diff --git a/src/data/equiv/basic.lean b/src/data/equiv/basic.lean
@@ -1418,6 +1418,14 @@ def subtype_prod_equiv_prod {α : Type u} {β : Type v} {p : α → Prop} {q : 
  λ ⟨⟨_, _⟩, ⟨_, _⟩⟩, rfl,
  λ ⟨⟨_, _⟩, ⟨_, _⟩⟩, rfl⟩
 
+/-- A subtype of a `prod` is equivalent to a sigma type whose fibers are subtypes. -/
+def subtype_prod_equiv_sigma_subtype {α β : Type*} (p : α → β → Prop) :
+  {x : α × β // p x.1 x.2} ≃ Σ a, {b : β // p a b} :=
+{ to_fun := λ x, ⟨x.1.1, x.1.2, x.prop⟩,
+  inv_fun := λ x, ⟨⟨x.1, x.2⟩, x.2.prop⟩,
+  left_inv := λ x, by ext; refl,
+  right_inv := λ ⟨a, b, pab⟩, rfl }
+
 end
 
 section subtype_equiv_codomain
diff --git a/src/data/equiv/embedding.lean b/src/data/equiv/embedding.lean
@@ -0,0 +1,85 @@
+/-
+Copyright (c) 2021 Eric Rodriguez. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Eric Rodriguez
+-/
+import logic.embedding
+import data.set.lattice
+
+/-!
+# Equivalences on embeddings
+
+This file shows some advanced equivalences on embeddings, useful for constructing larger
+embeddings from smaller ones.
+-/
+
+open function.embedding
+
+namespace equiv
+
+/-- Embeddings from a sum type are equivalent to two separate embeddings with disjoint ranges. -/
+def sum_embedding_equiv_prod_embedding_disjoint {α β γ : Type*} :
+  ((α ⊕ β) ↪ γ) ≃ {f : (α ↪ γ) × (β ↪ γ) // disjoint (set.range f.1) (set.range f.2)} :=
+{ to_fun := λ f, ⟨(inl.trans f, inr.trans f),
+  begin
+    rintros _ ⟨⟨a, h⟩, ⟨b, rfl⟩⟩,
+    simp only [trans_apply, inl_apply, inr_apply] at h,
+    have : sum.inl a = sum.inr b := f.injective h,
+    simp only at this,
+    assumption
+  end⟩,
+  inv_fun := λ ⟨⟨f, g⟩, disj⟩,
+    ⟨λ x, match x with
+    | (sum.inl a) := f a
+    | (sum.inr b) := g b
+    end,
+    begin
+      rintros (a₁|b₁) (a₂|b₂) f_eq;
+      simp only [equiv.coe_fn_symm_mk, sum.elim_inl, sum.elim_inr] at f_eq,
+      { rw f.injective f_eq },
+      { simp! only at f_eq, exfalso, exact disj ⟨⟨a₁, by simp⟩, ⟨b₂, by simp [f_eq]⟩⟩  },
+      { simp! only at f_eq, exfalso, exact disj ⟨⟨a₂, by simp⟩, ⟨b₁, by simp [f_eq]⟩⟩  },
+      { rw g.injective f_eq }
+    end⟩,
+  left_inv := λ f, by { dsimp only, ext, cases x; simp! },
+  right_inv := λ ⟨⟨f, g⟩, _⟩, by { simp only [prod.mk.inj_iff], split; ext; simp! } }
+
+/-- Embeddings whose range lies within a set are equivalent to embeddings to that set.
+This is `function.embedding.cod_restrict` as an equiv. -/
+def cod_restrict (α : Type*) {β : Type*} (bs : set β) :
+  {f : α ↪ β // ∀ a, f a ∈ bs} ≃ (α ↪ bs) :=
+{ to_fun := λ f, (f : α ↪ β).cod_restrict bs f.prop,
+  inv_fun := λ f, ⟨f.trans (function.embedding.subtype _), λ a, (f a).prop⟩,
+  left_inv := λ x, by ext; refl,
+  right_inv := λ x, by ext; refl }
+
+/-- Pairs of embeddings with disjoint ranges are equivalent to a dependent sum of embeddings,
+in which the second embedding cannot take values in the range of the first. -/
+def prod_embedding_disjoint_equiv_sigma_embedding_restricted {α β γ : Type*} :
+  {f : (α ↪ γ) × (β ↪ γ) // disjoint (set.range f.1) (set.range f.2)} ≃
+  (Σ f : α ↪ γ, β ↪ ↥((set.range f)ᶜ)) :=
+(subtype_prod_equiv_sigma_subtype $
+  λ (a : α ↪ γ) (b : β ↪ _), disjoint (set.range a) (set.range b)).trans $
+  equiv.sigma_congr_right $ λ a,
+    (subtype_equiv_prop begin
+      ext f,
+      rw [←set.range_subset_iff, set.subset_compl_iff_disjoint],
+      exact disjoint.comm.trans disjoint_iff,
+    end).trans (cod_restrict _ _)
+
+/-- A combination of the above results, allowing us to turn one embedding over a sum type
+into two dependent embeddings, the second of which avoids any members of the range
+of the first. This is helpful for constructing larger embeddings out of smaller ones. -/
+def sum_embedding_equiv_sigma_embedding_restricted {α β γ : Type*} :
+  ((α ⊕ β) ↪ γ) ≃ (Σ f : α ↪ γ, β ↪ ↥((set.range f)ᶜ))
+:= equiv.trans sum_embedding_equiv_prod_embedding_disjoint
+               prod_embedding_disjoint_equiv_sigma_embedding_restricted
+
+/-- Embeddings from a single-member type are equivalent to members of the target type. -/
+def unique_embedding_equiv_result {α β : Type*} [unique α] : (α ↪ β) ≃ β :=
+{ to_fun := λ f, f (default α),
+  inv_fun := λ x, ⟨λ _, x, λ _ _ _, subsingleton.elim _ _⟩,
+  left_inv := λ _, by { ext, simp_rw [function.embedding.coe_fn_mk], congr },
+  right_inv := λ _, by simp }
+
+end equiv
diff --git a/src/data/fintype/basic.lean b/src/data/fintype/basic.lean
@@ -938,6 +938,14 @@ lemma equiv_of_fintype_self_embedding_to_embedding {α : Type*} [fintype α] (e
   e.equiv_of_fintype_self_embedding.to_embedding = e :=
 by { ext, refl, }
 
+/-- If `‖β‖ < ‖α‖` there are no embeddings `α ↪ β`.
+This is a formulation of the pigeonhole principle.
+
+Note this cannot be an instance as it needs `h`. -/
+@[simp] lemma is_empty_of_card_lt {α β} [fintype α] [fintype β]
+  (h : fintype.card β < fintype.card α) : is_empty (α ↪ β) :=
+⟨λ f, let ⟨x, y, ne, feq⟩ := fintype.exists_ne_map_eq_of_card_lt f h in ne $ f.injective feq⟩
+
 end function.embedding
 
 @[simp]
@@ -1017,6 +1025,10 @@ fintype.of_surjective quotient.mk (λ x, quotient.induction_on x (λ x, ⟨x, rf
 instance finset.fintype [fintype α] : fintype (finset α) :=
 ⟨univ.powerset, λ x, finset.mem_powerset.2 (finset.subset_univ _)⟩
 
+instance function.embedding.fintype {α β} [fintype α] [fintype β]
+  [decidable_eq α] [decidable_eq β] : fintype (α ↪ β) :=
+fintype.of_equiv _ (equiv.subtype_injective_equiv_embedding α β)
+
 @[simp] lemma fintype.card_finset [fintype α] :
   fintype.card (finset α) = 2 ^ (fintype.card α) :=
 finset.card_powerset finset.univ
@@ -1402,6 +1414,10 @@ not_fintype α
 @[simp] lemma not_nonempty_fintype {α : Type*} : ¬ nonempty (fintype α) ↔ infinite α :=
 not_nonempty_iff.trans is_empty_fintype
 
+/-- A non-infinite type is a fintype. -/
+noncomputable def fintype_of_not_infinite {α : Type*} (h : ¬ infinite α) : fintype α :=
+((not_iff_comm.mp not_nonempty_fintype).mp h).some
+
 lemma finset.exists_minimal {α : Type*} [preorder α] (s : finset α) (h : s.nonempty) :
   ∃ m ∈ s, ∀ x ∈ s, ¬ (x < m) :=
 begin
@@ -1488,6 +1504,18 @@ begin
   intros x y, contrapose, apply hf,
 end
 
+instance function.embedding.is_empty {α β} [infinite α] [fintype β] : is_empty (α ↪ β) :=
+⟨λ f, let ⟨x, y, ne, feq⟩ := fintype.exists_ne_map_eq_of_infinite f in ne $ f.injective feq⟩
+
+@[priority 100]
+noncomputable instance function.embedding.fintype'' {α β : Type*} [fintype β] : fintype (α ↪ β) :=
+begin
+  by_cases h : infinite α,
+  { resetI, apply_instance },
+  { have := fintype_of_not_infinite h, classical, apply_instance }
+  -- the `classical` generates `decidable_eq α/β` instances, and resets instance cache
+end
+
 /--
 The strong pigeonhole principle for infinitely many pigeons in
 finitely many pigeonholes.  If there are infinitely many pigeons in
@@ -1499,12 +1527,11 @@ See also: `fintype.exists_ne_map_eq_of_infinite`
 lemma fintype.exists_infinite_fiber [infinite α] [fintype β] (f : α → β) :
   ∃ y : β, infinite (f ⁻¹' {y}) :=
 begin
-  classical, by_contra hf, push_neg at hf,
-  haveI h' : ∀ (y : β), fintype (f ⁻¹' {y}) := begin
-    intro y, specialize hf y,
-    rw [←not_nonempty_fintype, not_not] at hf,
-    exact classical.choice hf,
-  end,
+  classical,
+  by_contra hf,
+  push_neg at hf,
+
+  haveI := λ y, fintype_of_not_infinite $ hf y,
   let key : fintype α :=
   { elems := univ.bUnion (λ (y : β), (f ⁻¹' {y}).to_finset),
     complete := by simp },
@@ -1517,6 +1544,10 @@ assume (hf : surjective f),
 have H : infinite α := infinite.of_surjective f hf,
 by exactI not_fintype α
 
+-- the instance generated by `is_empty → subsingleton → fintype is non-computable
+instance function.embedding.fintype' {α β} [infinite α] [fintype β] : fintype (α ↪ β) :=
+{ elems := finset.empty, complete := is_empty_elim }
+
 instance nat.infinite : infinite ℕ :=
 ⟨λ ⟨s, hs⟩, finset.not_mem_range_self $ s.subset_range_sup_succ (hs _)⟩
 
diff --git a/src/data/fintype/card_embedding.lean b/src/data/fintype/card_embedding.lean
@@ -0,0 +1,83 @@
+/-
+Copyright (c) 2021 Eric Rodriguez. All rights reserved.
+Released under Apache 2.0 license as described in the file LICENSE.
+Authors: Eric Rodriguez
+-/
+import data.fintype.card
+import data.equiv.fin
+import data.equiv.embedding
+
+/-!
+# Birthday Problem
+
+This file establishes the cardinality of `α ↪ β` in full generality.
+-/
+
+local notation `|` x `|` := finset.card x
+local notation `‖` x `‖` := fintype.card x
+
+open_locale nat
+
+namespace fintype
+
+-- We need the separate `fintype α` instance as it contains data,
+-- and may not match definitionally with the instance coming from `unique.fintype`.
+lemma card_embedding_of_unique
+  {α β : Type*} [unique α] [fintype α] [fintype β] [decidable_eq α] [decidable_eq β]:
+‖α ↪ β‖ = ‖β‖ := card_congr equiv.unique_embedding_equiv_result
+
+private lemma card_embedding_aux (n : ℕ) (β) [fintype β] [decidable_eq β] (h : n ≤ ‖β‖) :
+  ‖fin n ↪ β‖ = nat.desc_fac (‖β‖ - n) n :=
+begin
+  induction n with n hn,
+  { nontriviality (fin 0 ↪ β),
+    rw [nat.desc_fac_zero, fintype.card_eq_one_iff],
+    refine ⟨nonempty.some nontrivial.to_nonempty, λ x, function.embedding.ext fin.elim0⟩ },
+
+  rw [nat.succ_eq_add_one, ←card_congr (equiv.embedding_congr fin_sum_fin_equiv (equiv.refl β))],
+  rw card_congr equiv.sum_embedding_equiv_sigma_embedding_restricted,
+  -- these `rw`s create goals for instances, which it doesn't infer for some reason
+  all_goals { try { apply_instance } }, -- however, this needs to be done here instead of at the end
+  -- else, a later `simp`, which depends on the `fintype` instance, won't work.
+
+  have : ∀ (f : fin n ↪ β), ‖fin 1 ↪ ↥((set.range f)ᶜ)‖ = ‖β‖ - n,
+  { intro f,
+    rw card_embedding_of_unique,
+    rw card_of_finset' (finset.map f finset.univ)ᶜ,
+    { rw [finset.card_compl, finset.card_map, finset.card_fin] },
+    { simp } },
+
+  -- putting `card_sigma` in `simp` causes it not to fully simplify
+  rw card_sigma,
+  simp only [this, finset.sum_const, finset.card_univ, nsmul_eq_mul, nat.cast_id],
+
+  replace h := nat.lt_of_succ_le h,
+  rw [hn h.le, mul_comm, nat.desc_fac_of_sub h]
+end
+
+variables {α β : Type*} [fintype α] [fintype β] [decidable_eq α] [decidable_eq β]
+
+/- Establishes the cardinality of the type of all injections, if any exist.  -/
+@[simp] theorem card_embedding (h : ‖α‖ ≤ ‖β‖) : ‖α ↪ β‖ = (nat.desc_fac (‖β‖ - ‖α‖) ‖α‖) :=
+begin
+  trunc_cases fintype.trunc_equiv_fin α with eq,
+  rw fintype.card_congr (equiv.embedding_congr eq (equiv.refl β)),
+  exact card_embedding_aux _ _ h,
+end
+
+/-- If `‖β‖ < ‖α‖` there are no embeddings `α ↪ β`.
+This is a formulation of the pigeonhole principle. -/
+@[simp] theorem card_embedding_eq_zero (h : ‖β‖ < ‖α‖) : ‖α ↪ β‖ = 0 :=
+card_eq_zero_iff.mpr $ function.embedding.is_empty_of_card_lt h
+
+theorem card_embedding_eq_if : ‖α ↪ β‖ = if ‖α‖ ≤ ‖β‖ then nat.desc_fac (‖β‖ - ‖α‖) ‖α‖ else 0 :=
+begin
+  split_ifs with h,
+  { exact card_embedding h },
+  { exact card_embedding_eq_zero (not_le.mp h) }
+end
+
+lemma card_embedding_eq_infinite {α β} [infinite α] [fintype β] : ‖α ↪ β‖ = 0 :=
+by rw card_eq_zero_iff; apply_instance
+
+end fintype
diff --git a/src/data/nat/factorial.lean b/src/data/nat/factorial.lean
@@ -184,6 +184,14 @@ begin
   exact (nat.mul_div_cancel' $ factorial_dvd_factorial $ le.intro rfl).symm
 end
 
+lemma desc_fac_of_sub {n k : ℕ} (h : n < k) :
+  (k - n) * (k - n).desc_fac n = (k - (n + 1)).desc_fac (n + 1) :=
+begin
+  set t := k - n.succ with ht,
+  suffices h' : k - n = t.succ, by rw [←ht, h', succ_desc_fac, desc_fac_succ],
+  rw [ht, succ_eq_add_one, ←sub_sub_assoc h (succ_pos _), succ_sub_one],
+end
+
 end desc_fac
 
 end nat
diff --git a/src/logic/embedding.lean b/src/logic/embedding.lean
@@ -62,6 +62,9 @@ lemma ext_iff {α β} {f g : embedding α β} : (∀ x, f x = g x) ↔ f = g :=
 @[simp] theorem coe_fn_mk {α β} (f : α → β) (i) :
   (@mk _ _ f i : α → β) = f := rfl
 
+@[simp] lemma mk_coe {α β : Type*} (f : α ↪ β) (inj) : (⟨f, inj⟩ : α ↪ β) = f :=
+by { ext, simp }
+
 theorem injective {α β} (f : α ↪ β) : injective f := f.inj'
 
 @[refl, simps {simp_rhs := tt}]
@@ -232,6 +235,46 @@ end function
 
 namespace equiv
 
+open function.embedding
+
+/-- The type of embeddings `α ↪ β` is equivalent to
+    the subtype of all injective functions `α → β`. -/
+def subtype_injective_equiv_embedding (α β : Sort*) :
+  {f : α → β // function.injective f} ≃ (α ↪ β) :=
+{ to_fun := λ f, ⟨f.val, f.property⟩,
+  inv_fun := λ f, ⟨f, f.injective⟩,
+  left_inv := λ f, by simp,
+  right_inv := λ f, by {ext, refl} }
+
+/-- If `α₁ ≃ α₂` and `β₁ ≃ β₂`, then the type of embeddings `α₁ ↪ β₁`
+is equivalent to the type of embeddings `α₂ ↪ β₂`. -/
+@[congr, simps apply] def embedding_congr {α β γ δ : Sort*}
+  (h : α ≃ β) (h' : γ ≃ δ) : (α ↪ γ) ≃ (β ↪ δ) :=
+{ to_fun := λ f, h.symm.to_embedding.trans $ f.trans $ h'.to_embedding,
+  inv_fun := λ f, h.to_embedding.trans $ f.trans $ h'.symm.to_embedding,
+  left_inv := λ x, by {ext, simp},
+  right_inv := λ x, by {ext, simp} }
+
+@[simp] lemma embedding_congr_refl {α β : Sort*} :
+  embedding_congr (equiv.refl α) (equiv.refl β) = equiv.refl (α ↪ β) :=
+by {ext, refl}
+
+@[simp] lemma embedding_congr_trans {α₁ β₁ α₂ β₂ α₃ β₃ : Sort*}
+  (e₁ : α₁ ≃ α₂) (e₁' : β₁ ≃ β₂) (e₂ : α₂ ≃ α₃) (e₂' : β₂ ≃ β₃) :
+  embedding_congr (e₁.trans e₂) (e₁'.trans e₂') =
+  (embedding_congr e₁ e₁').trans (embedding_congr e₂ e₂') :=
+rfl
+
+@[simp] lemma embedding_congr_symm {α₁ β₁ α₂ β₂ : Sort*} (e₁ : α₁ ≃ α₂) (e₂ : β₁ ≃ β₂) :
+  (embedding_congr e₁ e₂).symm = embedding_congr e₁.symm e₂.symm :=
+rfl
+
+lemma embedding_congr_apply_trans {α₁ β₁ γ₁ α₂ β₂ γ₂ : Sort*}
+  (ea : α₁ ≃ α₂) (eb : β₁ ≃ β₂) (ec : γ₁ ≃ γ₂) (f : α₁ ↪ β₁) (g : β₁ ↪ γ₁) :
+  equiv.embedding_congr ea ec (f.trans g) =
+  (equiv.embedding_congr ea eb f).trans (equiv.embedding_congr eb ec g) :=
+by {ext, simp}
+
 @[simp]
 lemma refl_to_embedding {α : Type*} : (equiv.refl α).to_embedding = function.embedding.refl α := rfl
 
diff --git a/src/ring_theory/polynomial/pochhammer.lean b/src/ring_theory/polynomial/pochhammer.lean
@@ -11,7 +11,8 @@ import data.polynomial.eval
 
 We define and prove some basic relations about
 `pochhammer S n : polynomial S = X * (X+1) * ... * (X + n - 1)`
-which is also known as the rising factorial.
+which is also known as the rising factorial. A version of this definition
+that is focused on `nat` can be found in `data.nat.factorial` as `desc_fac`.
 
 ## Implementation