diff --git a/src/data/fintype/basic.lean b/src/data/fintype/basic.lean index ede6df4440323..78315c8cb6568 100644 --- a/src/data/fintype/basic.lean +++ b/src/data/fintype/basic.lean @@ -1306,6 +1306,37 @@ instance Prop.fintype : fintype Prop := instance subtype.fintype (p : α → Prop) [decidable_pred p] [fintype α] : fintype {x // p x} := fintype.subtype (univ.filter p) (by simp) +lemma image_subtype_ne_univ_eq_image_erase [fintype α] (k : β) (b : α → β) : + image (λ i : {a // b a ≠ k}, b ↑i) univ = (image b univ).erase k := +begin + apply subset_antisymm, + { rw image_subset_iff, + intros i _, + apply mem_erase_of_ne_of_mem i.2 (mem_image_of_mem _ (mem_univ _)) }, + { intros i hi, + rw mem_image, + rcases mem_image.1 (erase_subset _ _ hi) with ⟨a, _, ha⟩, + subst ha, + exact ⟨⟨a, ne_of_mem_erase hi⟩, mem_univ _, rfl⟩ } +end + +lemma image_subtype_univ_ssubset_image_univ [fintype α] (k : β) (b : α → β) + (hk : k ∈ image b univ) (p : β → Prop) [decidable_pred p] (hp : ¬ p k) : + image (λ i : {a // p (b a)}, b ↑i) univ ⊂ image b univ := +begin + split, + { intros x hx, + rcases mem_image.1 hx with ⟨y, _, hy⟩, + exact hy ▸ mem_image_of_mem b (mem_univ y) }, + { intros h, + rw mem_image at hk, + rcases hk with ⟨k', _, hk'⟩, subst hk', + have := h (mem_image_of_mem b (mem_univ k')), + rw mem_image at this, + rcases this with ⟨j, hj, hj'⟩, + exact hp (hj' ▸ j.2) } +end + @[simp] lemma set.to_finset_eq_empty_iff {s : set α} [fintype s] : s.to_finset = ∅ ↔ s = ∅ := by simp only [ext_iff, set.ext_iff, set.mem_to_finset, not_mem_empty, set.mem_empty_iff_false] diff --git a/src/data/matrix/block.lean b/src/data/matrix/block.lean index 2cf32751d5ad0..d659e5d98878f 100644 --- a/src/data/matrix/block.lean +++ b/src/data/matrix/block.lean @@ -220,16 +220,50 @@ by { ext i, cases i; simp [vec_mul, dot_product] } variables [decidable_eq l] [decidable_eq m] -@[simp] lemma from_blocks_diagonal [has_zero α] (d₁ : l → α) (d₂ : m → α) : +section has_zero +variables [has_zero α] + +lemma to_block_diagonal_self (d : m → α) (p : m → Prop) : + matrix.to_block (diagonal d) p p = diagonal (λ i : subtype p, d ↑i) := +begin + ext i j, + by_cases i = j, + { simp [h] }, + { simp [has_one.one, h, λ h', h $ subtype.ext h'], } +end + +lemma to_block_diagonal_disjoint (d : m → α) {p q : m → Prop} (hpq : disjoint p q) : + matrix.to_block (diagonal d) p q = 0 := +begin + ext ⟨i, hi⟩ ⟨j, hj⟩, + have : i ≠ j, from λ heq, hpq i ⟨hi, heq.symm ▸ hj⟩, + simp [diagonal_apply_ne d this] +end + +@[simp] lemma from_blocks_diagonal (d₁ : l → α) (d₂ : m → α) : from_blocks (diagonal d₁) 0 0 (diagonal d₂) = diagonal (sum.elim d₁ d₂) := begin ext i j, rcases i; rcases j; simp [diagonal], end -@[simp] lemma from_blocks_one [has_zero α] [has_one α] : +end has_zero + +section has_zero_has_one +variables [has_zero α] [has_one α] + +@[simp] lemma from_blocks_one : from_blocks (1 : matrix l l α) 0 0 (1 : matrix m m α) = 1 := by { ext i j, rcases i; rcases j; simp [one_apply] } +@[simp] lemma to_block_one_self (p : m → Prop) : matrix.to_block (1 : matrix m m α) p p = 1 := +to_block_diagonal_self _ p + +lemma to_block_one_disjoint {p q : m → Prop} (hpq : disjoint p q) : + matrix.to_block (1 : matrix m m α) p q = 0 := +to_block_diagonal_disjoint _ hpq + +end has_zero_has_one + end block_matrices section block_diagonal @@ -662,4 +696,31 @@ rfl end block_diag' +section +variables [comm_ring R] + +lemma to_block_mul_eq_mul {m n k : Type*} [fintype n] (p : m → Prop) (q : k → Prop) + (A : matrix m n R) (B : matrix n k R) : + (A ⬝ B).to_block p q = A.to_block p ⊤ ⬝ B.to_block ⊤ q := +begin + ext i k, + simp only [to_block_apply, mul_apply], + rw finset.sum_subtype, + simp [has_top.top, complete_lattice.top, bounded_order.top], +end + +lemma to_block_mul_eq_add + {m n k : Type*} [fintype n] (p : m → Prop) (q : n → Prop) [decidable_pred q] (r : k → Prop) + (A : matrix m n R) (B : matrix n k R) : + (A ⬝ B).to_block p r = + A.to_block p q ⬝ B.to_block q r + A.to_block p (λ i, ¬ q i) ⬝ B.to_block (λ i, ¬ q i) r := +begin + classical, + ext i k, + simp only [to_block_apply, mul_apply, pi.add_apply], + convert (fintype.sum_subtype_add_sum_subtype q (λ x, A ↑i x * B x ↑k)).symm +end + +end + end matrix diff --git a/src/linear_algebra/matrix/block.lean b/src/linear_algebra/matrix/block.lean index de0d47e2d99bd..c8bba64c869ee 100644 --- a/src/linear_algebra/matrix/block.lean +++ b/src/linear_algebra/matrix/block.lean @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE. Authors: Johannes Hölzl, Patrick Massot, Casper Putz, Anne Baanen -/ import linear_algebra.matrix.determinant +import linear_algebra.matrix.nonsingular_inverse import tactic.fin_cases /-! @@ -35,8 +36,8 @@ open_locale big_operators matrix universes v -variables {α m n : Type*} -variables {R : Type v} [comm_ring R] {M : matrix m m R} {b : m → α} +variables {α β m n o : Type*} {m' n' : α → Type*} +variables {R : Type v} [comm_ring R] {M N : matrix m m R} {b : m → α} namespace matrix @@ -67,28 +68,66 @@ protected lemma block_triangular.transpose : @[simp] protected lemma block_triangular_transpose_iff {b : m → αᵒᵈ} : Mᵀ.block_triangular b ↔ M.block_triangular (of_dual ∘ b) := forall_swap +@[simp] lemma block_triangular_zero : block_triangular (0 : matrix m m R) b := λ i j h, rfl + +protected lemma block_triangular.neg (hM : block_triangular M b) : block_triangular (-M) b := +λ i j h, neg_eq_zero.2 $ hM h + +lemma block_triangular.add (hM : block_triangular M b) (hN : block_triangular N b) : + block_triangular (M + N) b := +λ i j h, by simp_rw [pi.add_apply, hM h, hN h, zero_add] + +lemma block_triangular.sub (hM : block_triangular M b) (hN : block_triangular N b) : + block_triangular (M - N) b := +λ i j h, by simp_rw [pi.sub_apply, hM h, hN h, sub_zero] + end has_lt +section preorder +variables [preorder α] + +lemma block_triangular_diagonal [decidable_eq m] (d : m → R) : + block_triangular (diagonal d) b := +λ i j h, diagonal_apply_ne' d (λ h', ne_of_lt h (congr_arg _ h')) + +lemma block_triangular_block_diagonal' [decidable_eq α] (d : Π (i : α), matrix (m' i) (m' i) R) : + block_triangular (block_diagonal' d) sigma.fst := +begin + rintros ⟨i, i'⟩ ⟨j, j'⟩ h, + apply block_diagonal'_apply_ne d i' j' (λ h', ne_of_lt h h'.symm), +end + +lemma block_triangular_block_diagonal [decidable_eq α] (d : α → matrix m m R) : + block_triangular (block_diagonal d) prod.snd := +begin + rintros ⟨i, i'⟩ ⟨j, j'⟩ h, + rw [block_diagonal'_eq_block_diagonal, block_triangular_block_diagonal'], + exact h +end + +end preorder + +section linear_order +variables [linear_order α] + +lemma block_triangular.mul [fintype m] + {M N : matrix m m R} (hM : block_triangular M b) (hN : block_triangular N b): + block_triangular (M * N) b := +begin + intros i j hij, + apply finset.sum_eq_zero, + intros k hk, + by_cases hki : b k < b i, + { simp_rw [hM hki, zero_mul] }, + { simp_rw [hN (lt_of_lt_of_le hij (le_of_not_lt hki)), mul_zero] }, +end + +end linear_order + lemma upper_two_block_triangular [preorder α] (A : matrix m m R) (B : matrix m n R) (D : matrix n n R) {a b : α} (hab : a < b) : block_triangular (from_blocks A B 0 D) (sum.elim (λ i, a) (λ j, b)) := -begin - intros k1 k2 hk12, - have hor : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = a ∨ sum.elim (λ i, a) (λ j, b) k = b, - { simp }, - have hne : a ≠ b, from λ h, lt_irrefl _ (lt_of_lt_of_eq hab h.symm), - have ha : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = a → ∃ i, k = sum.inl i, - { simp [hne.symm] }, - have hb : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = b → ∃ j, k = sum.inr j, - { simp [hne] }, - cases (hor k1) with hk1 hk1; cases (hor k2) with hk2 hk2; rw [hk1, hk2] at hk12, - { exact false.elim (lt_irrefl a hk12), }, - { exact false.elim (lt_irrefl _ (lt_trans hab hk12)) }, - { obtain ⟨i, hi⟩ := hb k1 hk1, - obtain ⟨j, hj⟩ := ha k2 hk2, - rw [hi, hj], simp }, - { exact absurd hk12 (irrefl b) } -end +by rintro (c | c) (d | d) hcd; simpa [hab.not_lt] using hcd <|> simp /-! ### Determinant -/ @@ -148,45 +187,31 @@ begin unfreezingI { induction hs : univ.image b using finset.strong_induction with s ih generalizing m }, subst hs, - by_cases h : univ.image b = ∅, - { haveI := univ_eq_empty_iff.1 (image_eq_empty.1 h), - simp [h] }, - { let k := (univ.image b).max' (nonempty_of_ne_empty h), - rw two_block_triangular_det' M (λ i, b i = k), - { have : univ.image b = insert k ((univ.image b).erase k), - { rw insert_erase, apply max'_mem }, - rw [this, prod_insert (not_mem_erase _ _)], - refine congr_arg _ _, - let b' := λ i : {a // b a ≠ k}, b ↑i, - have h' : block_triangular (M.to_square_block_prop (λ (i : m), b i ≠ k)) b', - { intros i j, apply hM }, - have hb' : image b' univ = (image b univ).erase k, - { apply subset_antisymm, - { rw image_subset_iff, - intros i _, - apply mem_erase_of_ne_of_mem i.2 (mem_image_of_mem _ (mem_univ _)) }, - { intros i hi, - rw mem_image, - rcases mem_image.1 (erase_subset _ _ hi) with ⟨a, _, ha⟩, - subst ha, - exact ⟨⟨a, ne_of_mem_erase hi⟩, mem_univ _, rfl⟩ } }, - rw ih ((univ.image b).erase k) (erase_ssubset (max'_mem _ _)) h' hb', - apply finset.prod_congr rfl, - intros l hl, - let he : {a // b' a = l} ≃ {a // b a = l}, - { have hc : ∀ (i : m), (λ a, b a = l) i → (λ a, b a ≠ k) i, - { intros i hbi, rw hbi, exact ne_of_mem_erase hl }, - exact equiv.subtype_subtype_equiv_subtype hc }, - simp only [to_square_block_def], - rw ← matrix.det_reindex_self he.symm (λ (i j : {a // b a = l}), M ↑i ↑j), - refine congr_arg _ _, - ext, - simp [to_square_block_def, to_square_block_prop_def] }, + casesI is_empty_or_nonempty m, + { simp }, + let k := (univ.image b).max' (univ_nonempty.image _), + rw two_block_triangular_det' M (λ i, b i = k), + { have : univ.image b = insert k ((univ.image b).erase k), + { rw insert_erase, apply max'_mem }, + rw [this, prod_insert (not_mem_erase _ _)], + refine congr_arg _ _, + let b' := λ i : {a // b a ≠ k}, b ↑i, + have h' : block_triangular (M.to_square_block_prop (λ i, b i ≠ k)) b' := hM.submatrix, + have hb' : image b' univ = (image b univ).erase k, + { convert image_subtype_ne_univ_eq_image_erase k b }, + rw ih _ (erase_ssubset $ max'_mem _ _) h' hb', + refine finset.prod_congr rfl (λ l hl, _), + let he : {a // b' a = l} ≃ {a // b a = l}, + { have hc : ∀ i, b i = l → b i ≠ k := λ i hi, ne_of_eq_of_ne hi (ne_of_mem_erase hl), + exact equiv.subtype_subtype_equiv_subtype hc }, + simp only [to_square_block_def], + rw ← matrix.det_reindex_self he.symm (λ (i j : {a // b a = l}), M ↑i ↑j), + refl }, { intros i hi j hj, apply hM, rw hi, apply lt_of_le_of_ne _ hj, - exact finset.le_max' (univ.image b) _ (mem_image_of_mem _ (mem_univ _)) } } + exact finset.le_max' (univ.image b) _ (mem_image_of_mem _ (mem_univ _)) } end lemma block_triangular.det_fintype [decidable_eq α] [fintype α] [linear_order α] @@ -209,4 +234,80 @@ lemma det_of_lower_triangular [linear_order m] (M : matrix m m R) (h : M.block_t M.det = ∏ i : m, M i i := by { rw ←det_transpose, exact det_of_upper_triangular h.transpose } +/-! ### Invertible -/ + +lemma block_triangular.to_block_inverse_mul_to_block_eq_one [linear_order α] [invertible M] + (hM : block_triangular M b) (k : α) : + M⁻¹.to_block (λ i, b i < k) (λ i, b i < k) ⬝ M.to_block (λ i, b i < k) (λ i, b i < k) = 1 := +begin + let p := (λ i, b i < k), + have h_sum : M⁻¹.to_block p p ⬝ M.to_block p p + + M⁻¹.to_block p (λ i, ¬ p i) ⬝ M.to_block (λ i, ¬ p i) p = 1, + by rw [←to_block_mul_eq_add, inv_mul_of_invertible M, to_block_one_self], + have h_zero : M.to_block (λ i, ¬ p i) p = 0, + { ext i j, + simpa using hM (lt_of_lt_of_le j.2 (le_of_not_lt i.2)) }, + simpa [h_zero] using h_sum +end + +/-- The inverse of an upper-left subblock of a block-triangular matrix `M` is the upper-left +subblock of `M⁻¹`. -/ +lemma block_triangular.inv_to_block [linear_order α] [invertible M] + (hM : block_triangular M b) (k : α) : + (M.to_block (λ i, b i < k) (λ i, b i < k))⁻¹ = M⁻¹.to_block (λ i, b i < k) (λ i, b i < k) := +inv_eq_left_inv $ hM.to_block_inverse_mul_to_block_eq_one k + +/-- An upper-left subblock of an invertible block-triangular matrix is invertible. -/ +def block_triangular.invertible_to_block [linear_order α] [invertible M] + (hM : block_triangular M b) (k : α) : + invertible (M.to_block (λ i, b i < k) (λ i, b i < k)) := +invertible_of_left_inverse _ ((⅟M).to_block (λ i, b i < k) (λ i, b i < k)) $ + by simpa only [inv_of_eq_nonsing_inv] using hM.to_block_inverse_mul_to_block_eq_one k + +/-- A lower-left subblock of the inverse of a block-triangular matrix is zero. This is a first step +towards `block_triangular.inv_to_block` below. -/ +lemma to_block_inverse_eq_zero [linear_order α] [invertible M] (hM : block_triangular M b) (k : α) : + M⁻¹.to_block (λ i, k ≤ b i) (λ i, b i < k) = 0 := +begin + let p := λ i, b i < k, + let q := λ i, ¬ b i < k, + have h_sum : M⁻¹.to_block q p ⬝ M.to_block p p + M⁻¹.to_block q q ⬝ M.to_block q p = 0, + { rw [←to_block_mul_eq_add, inv_mul_of_invertible M, to_block_one_disjoint], + exact λ i h, h.1 h.2 }, + have h_zero : M.to_block q p = 0, + { ext i j, + simpa using hM (lt_of_lt_of_le j.2 $ le_of_not_lt i.2) }, + have h_mul_eq_zero : M⁻¹.to_block q p ⬝ M.to_block p p = 0 := by simpa [h_zero] using h_sum, + haveI : invertible (M.to_block p p) := hM.invertible_to_block k, + have : (λ i, k ≤ b i) = q := by { ext, exact not_lt.symm }, + rw [this, ← matrix.zero_mul (M.to_block p p)⁻¹, ← h_mul_eq_zero, + mul_inv_cancel_right_of_invertible], +end + +/-- The inverse of a block-triangular matrix is block-triangular. -/ +lemma block_triangular_inv_of_block_triangular [linear_order α] [invertible M] + (hM : block_triangular M b) : + block_triangular M⁻¹ b := +begin + unfreezingI { induction hs : univ.image b using finset.strong_induction + with s ih generalizing m }, + subst hs, + intros i j hij, + haveI : inhabited m := ⟨i⟩, + let k := (univ.image b).max' (univ_nonempty.image _), + let b' := λ i : {a // b a < k}, b ↑i, + let A := M.to_block (λ i, b i < k) (λ j, b j < k), + obtain hbi | hi : b i = k ∨ _ := (le_max' _ (b i) $ mem_image_of_mem _ $ mem_univ _).eq_or_lt, + { have : M⁻¹.to_block (λ i, k ≤ b i) (λ i, b i < k) ⟨i, hbi.ge⟩ ⟨j, hbi ▸ hij⟩ = 0, + { simp only [to_block_inverse_eq_zero hM k, pi.zero_apply] }, + simp [this.symm] }, + haveI : invertible A := hM.invertible_to_block _, + have hA : A.block_triangular b' := hM.submatrix, + have hb' : image b' univ ⊂ image b univ, + { convert image_subtype_univ_ssubset_image_univ k b _ (λ a, a < k) (lt_irrefl _), + convert max'_mem _ _ }, + have hij' : b' ⟨j, hij.trans hi⟩ < b' ⟨i, hi⟩, by simp_rw [b', subtype.coe_mk, hij], + simp [hM.inv_to_block k, (ih (image b' univ) hb' hA rfl hij').symm], +end + end matrix