Skip to content

Commit

Permalink
feat(linear_algebra/matrix): inverse of block triangular is block tri…
Browse files Browse the repository at this point in the history
…angular (#16150)

The inverse of a block triangular matrix is block triangular. I took the opportunity to refactor the proof about determinants of block triangular matrices as well.



Co-authored-by: Eric <ericrboidi@gmail.com>
Co-authored-by: Alexander Bentkamp <bentkamp@gmail.com>
  • Loading branch information
3 people committed Oct 13, 2022
1 parent f8b900b commit 46fc73a
Show file tree
Hide file tree
Showing 3 changed files with 249 additions and 56 deletions.
31 changes: 31 additions & 0 deletions src/data/fintype/basic.lean
Expand Up @@ -1306,6 +1306,37 @@ instance Prop.fintype : fintype Prop :=
instance subtype.fintype (p : α → Prop) [decidable_pred p] [fintype α] : fintype {x // p x} :=
fintype.subtype (univ.filter p) (by simp)

lemma image_subtype_ne_univ_eq_image_erase [fintype α] (k : β) (b : α → β) :
image (λ i : {a // b a ≠ k}, b ↑i) univ = (image b univ).erase k :=
begin
apply subset_antisymm,
{ rw image_subset_iff,
intros i _,
apply mem_erase_of_ne_of_mem i.2 (mem_image_of_mem _ (mem_univ _)) },
{ intros i hi,
rw mem_image,
rcases mem_image.1 (erase_subset _ _ hi) with ⟨a, _, ha⟩,
subst ha,
exact ⟨⟨a, ne_of_mem_erase hi⟩, mem_univ _, rfl⟩ }
end

lemma image_subtype_univ_ssubset_image_univ [fintype α] (k : β) (b : α → β)
(hk : k ∈ image b univ) (p : β → Prop) [decidable_pred p] (hp : ¬ p k) :
image (λ i : {a // p (b a)}, b ↑i) univ ⊂ image b univ :=
begin
split,
{ intros x hx,
rcases mem_image.1 hx with ⟨y, _, hy⟩,
exact hy ▸ mem_image_of_mem b (mem_univ y) },
{ intros h,
rw mem_image at hk,
rcases hk with ⟨k', _, hk'⟩, subst hk',
have := h (mem_image_of_mem b (mem_univ k')),
rw mem_image at this,
rcases this with ⟨j, hj, hj'⟩,
exact hp (hj' ▸ j.2) }
end

@[simp] lemma set.to_finset_eq_empty_iff {s : set α} [fintype s] : s.to_finset = ∅ ↔ s = ∅ :=
by simp only [ext_iff, set.ext_iff, set.mem_to_finset, not_mem_empty, set.mem_empty_iff_false]

Expand Down
65 changes: 63 additions & 2 deletions src/data/matrix/block.lean
Expand Up @@ -220,16 +220,50 @@ by { ext i, cases i; simp [vec_mul, dot_product] }

variables [decidable_eq l] [decidable_eq m]

@[simp] lemma from_blocks_diagonal [has_zero α] (d₁ : l → α) (d₂ : m → α) :
section has_zero
variables [has_zero α]

lemma to_block_diagonal_self (d : m → α) (p : m → Prop) :
matrix.to_block (diagonal d) p p = diagonal (λ i : subtype p, d ↑i) :=
begin
ext i j,
by_cases i = j,
{ simp [h] },
{ simp [has_one.one, h, λ h', h $ subtype.ext h'], }
end

lemma to_block_diagonal_disjoint (d : m → α) {p q : m → Prop} (hpq : disjoint p q) :
matrix.to_block (diagonal d) p q = 0 :=
begin
ext ⟨i, hi⟩ ⟨j, hj⟩,
have : i ≠ j, from λ heq, hpq i ⟨hi, heq.symm ▸ hj⟩,
simp [diagonal_apply_ne d this]
end

@[simp] lemma from_blocks_diagonal (d₁ : l → α) (d₂ : m → α) :
from_blocks (diagonal d₁) 0 0 (diagonal d₂) = diagonal (sum.elim d₁ d₂) :=
begin
ext i j, rcases i; rcases j; simp [diagonal],
end

@[simp] lemma from_blocks_one [has_zero α] [has_one α] :
end has_zero

section has_zero_has_one
variables [has_zero α] [has_one α]

@[simp] lemma from_blocks_one :
from_blocks (1 : matrix l l α) 0 0 (1 : matrix m m α) = 1 :=
by { ext i j, rcases i; rcases j; simp [one_apply] }

@[simp] lemma to_block_one_self (p : m → Prop) : matrix.to_block (1 : matrix m m α) p p = 1 :=
to_block_diagonal_self _ p

lemma to_block_one_disjoint {p q : m → Prop} (hpq : disjoint p q) :
matrix.to_block (1 : matrix m m α) p q = 0 :=
to_block_diagonal_disjoint _ hpq

end has_zero_has_one

end block_matrices

section block_diagonal
Expand Down Expand Up @@ -662,4 +696,31 @@ rfl

end block_diag'

section
variables [comm_ring R]

lemma to_block_mul_eq_mul {m n k : Type*} [fintype n] (p : m → Prop) (q : k → Prop)
(A : matrix m n R) (B : matrix n k R) :
(A ⬝ B).to_block p q = A.to_block p ⊤ ⬝ B.to_block ⊤ q :=
begin
ext i k,
simp only [to_block_apply, mul_apply],
rw finset.sum_subtype,
simp [has_top.top, complete_lattice.top, bounded_order.top],
end

lemma to_block_mul_eq_add
{m n k : Type*} [fintype n] (p : m → Prop) (q : n → Prop) [decidable_pred q] (r : k → Prop)
(A : matrix m n R) (B : matrix n k R) :
(A ⬝ B).to_block p r =
A.to_block p q ⬝ B.to_block q r + A.to_block p (λ i, ¬ q i) ⬝ B.to_block (λ i, ¬ q i) r :=
begin
classical,
ext i k,
simp only [to_block_apply, mul_apply, pi.add_apply],
convert (fintype.sum_subtype_add_sum_subtype q (λ x, A ↑i x * B x ↑k)).symm
end

end

end matrix
209 changes: 155 additions & 54 deletions src/linear_algebra/matrix/block.lean
Expand Up @@ -4,6 +4,7 @@ Released under Apache 2.0 license as described in the file LICENSE.
Authors: Johannes Hölzl, Patrick Massot, Casper Putz, Anne Baanen
-/
import linear_algebra.matrix.determinant
import linear_algebra.matrix.nonsingular_inverse
import tactic.fin_cases

/-!
Expand Down Expand Up @@ -35,8 +36,8 @@ open_locale big_operators matrix

universes v

variables {α m n : Type*}
variables {R : Type v} [comm_ring R] {M : matrix m m R} {b : m → α}
variablesβ m n o : Type*} {m' n' : α → Type*}
variables {R : Type v} [comm_ring R] {M N : matrix m m R} {b : m → α}

namespace matrix

Expand Down Expand Up @@ -67,28 +68,66 @@ protected lemma block_triangular.transpose :
@[simp] protected lemma block_triangular_transpose_iff {b : m → αᵒᵈ} :
Mᵀ.block_triangular b ↔ M.block_triangular (of_dual ∘ b) := forall_swap

@[simp] lemma block_triangular_zero : block_triangular (0 : matrix m m R) b := λ i j h, rfl

protected lemma block_triangular.neg (hM : block_triangular M b) : block_triangular (-M) b :=
λ i j h, neg_eq_zero.2 $ hM h

lemma block_triangular.add (hM : block_triangular M b) (hN : block_triangular N b) :
block_triangular (M + N) b :=
λ i j h, by simp_rw [pi.add_apply, hM h, hN h, zero_add]

lemma block_triangular.sub (hM : block_triangular M b) (hN : block_triangular N b) :
block_triangular (M - N) b :=
λ i j h, by simp_rw [pi.sub_apply, hM h, hN h, sub_zero]

end has_lt

section preorder
variables [preorder α]

lemma block_triangular_diagonal [decidable_eq m] (d : m → R) :
block_triangular (diagonal d) b :=
λ i j h, diagonal_apply_ne' d (λ h', ne_of_lt h (congr_arg _ h'))

lemma block_triangular_block_diagonal' [decidable_eq α] (d : Π (i : α), matrix (m' i) (m' i) R) :
block_triangular (block_diagonal' d) sigma.fst :=
begin
rintros ⟨i, i'⟩ ⟨j, j'⟩ h,
apply block_diagonal'_apply_ne d i' j' (λ h', ne_of_lt h h'.symm),
end

lemma block_triangular_block_diagonal [decidable_eq α] (d : α → matrix m m R) :
block_triangular (block_diagonal d) prod.snd :=
begin
rintros ⟨i, i'⟩ ⟨j, j'⟩ h,
rw [block_diagonal'_eq_block_diagonal, block_triangular_block_diagonal'],
exact h
end

end preorder

section linear_order
variables [linear_order α]

lemma block_triangular.mul [fintype m]
{M N : matrix m m R} (hM : block_triangular M b) (hN : block_triangular N b):
block_triangular (M * N) b :=
begin
intros i j hij,
apply finset.sum_eq_zero,
intros k hk,
by_cases hki : b k < b i,
{ simp_rw [hM hki, zero_mul] },
{ simp_rw [hN (lt_of_lt_of_le hij (le_of_not_lt hki)), mul_zero] },
end

end linear_order

lemma upper_two_block_triangular [preorder α]
(A : matrix m m R) (B : matrix m n R) (D : matrix n n R) {a b : α} (hab : a < b) :
block_triangular (from_blocks A B 0 D) (sum.elim (λ i, a) (λ j, b)) :=
begin
intros k1 k2 hk12,
have hor : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = a ∨ sum.elim (λ i, a) (λ j, b) k = b,
{ simp },
have hne : a ≠ b, from λ h, lt_irrefl _ (lt_of_lt_of_eq hab h.symm),
have ha : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = a → ∃ i, k = sum.inl i,
{ simp [hne.symm] },
have hb : ∀ (k : m ⊕ n), sum.elim (λ i, a) (λ j, b) k = b → ∃ j, k = sum.inr j,
{ simp [hne] },
cases (hor k1) with hk1 hk1; cases (hor k2) with hk2 hk2; rw [hk1, hk2] at hk12,
{ exact false.elim (lt_irrefl a hk12), },
{ exact false.elim (lt_irrefl _ (lt_trans hab hk12)) },
{ obtain ⟨i, hi⟩ := hb k1 hk1,
obtain ⟨j, hj⟩ := ha k2 hk2,
rw [hi, hj], simp },
{ exact absurd hk12 (irrefl b) }
end
by rintro (c | c) (d | d) hcd; simpa [hab.not_lt] using hcd <|> simp

/-! ### Determinant -/

Expand Down Expand Up @@ -148,45 +187,31 @@ begin
unfreezingI { induction hs : univ.image b using finset.strong_induction
with s ih generalizing m },
subst hs,
by_cases h : univ.image b = ∅,
{ haveI := univ_eq_empty_iff.1 (image_eq_empty.1 h),
simp [h] },
{ let k := (univ.image b).max' (nonempty_of_ne_empty h),
rw two_block_triangular_det' M (λ i, b i = k),
{ have : univ.image b = insert k ((univ.image b).erase k),
{ rw insert_erase, apply max'_mem },
rw [this, prod_insert (not_mem_erase _ _)],
refine congr_arg _ _,
let b' := λ i : {a // b a ≠ k}, b ↑i,
have h' : block_triangular (M.to_square_block_prop (λ (i : m), b i ≠ k)) b',
{ intros i j, apply hM },
have hb' : image b' univ = (image b univ).erase k,
{ apply subset_antisymm,
{ rw image_subset_iff,
intros i _,
apply mem_erase_of_ne_of_mem i.2 (mem_image_of_mem _ (mem_univ _)) },
{ intros i hi,
rw mem_image,
rcases mem_image.1 (erase_subset _ _ hi) with ⟨a, _, ha⟩,
subst ha,
exact ⟨⟨a, ne_of_mem_erase hi⟩, mem_univ _, rfl⟩ } },
rw ih ((univ.image b).erase k) (erase_ssubset (max'_mem _ _)) h' hb',
apply finset.prod_congr rfl,
intros l hl,
let he : {a // b' a = l} ≃ {a // b a = l},
{ have hc : ∀ (i : m), (λ a, b a = l) i → (λ a, b a ≠ k) i,
{ intros i hbi, rw hbi, exact ne_of_mem_erase hl },
exact equiv.subtype_subtype_equiv_subtype hc },
simp only [to_square_block_def],
rw ← matrix.det_reindex_self he.symm (λ (i j : {a // b a = l}), M ↑i ↑j),
refine congr_arg _ _,
ext,
simp [to_square_block_def, to_square_block_prop_def] },
casesI is_empty_or_nonempty m,
{ simp },
let k := (univ.image b).max' (univ_nonempty.image _),
rw two_block_triangular_det' M (λ i, b i = k),
{ have : univ.image b = insert k ((univ.image b).erase k),
{ rw insert_erase, apply max'_mem },
rw [this, prod_insert (not_mem_erase _ _)],
refine congr_arg _ _,
let b' := λ i : {a // b a ≠ k}, b ↑i,
have h' : block_triangular (M.to_square_block_prop (λ i, b i ≠ k)) b' := hM.submatrix,
have hb' : image b' univ = (image b univ).erase k,
{ convert image_subtype_ne_univ_eq_image_erase k b },
rw ih _ (erase_ssubset $ max'_mem _ _) h' hb',
refine finset.prod_congr rfl (λ l hl, _),
let he : {a // b' a = l} ≃ {a // b a = l},
{ have hc : ∀ i, b i = l → b i ≠ k := λ i hi, ne_of_eq_of_ne hi (ne_of_mem_erase hl),
exact equiv.subtype_subtype_equiv_subtype hc },
simp only [to_square_block_def],
rw ← matrix.det_reindex_self he.symm (λ (i j : {a // b a = l}), M ↑i ↑j),
refl },
{ intros i hi j hj,
apply hM,
rw hi,
apply lt_of_le_of_ne _ hj,
exact finset.le_max' (univ.image b) _ (mem_image_of_mem _ (mem_univ _)) } }
exact finset.le_max' (univ.image b) _ (mem_image_of_mem _ (mem_univ _)) }
end

lemma block_triangular.det_fintype [decidable_eq α] [fintype α] [linear_order α]
Expand All @@ -209,4 +234,80 @@ lemma det_of_lower_triangular [linear_order m] (M : matrix m m R) (h : M.block_t
M.det = ∏ i : m, M i i :=
by { rw ←det_transpose, exact det_of_upper_triangular h.transpose }

/-! ### Invertible -/

lemma block_triangular.to_block_inverse_mul_to_block_eq_one [linear_order α] [invertible M]
(hM : block_triangular M b) (k : α) :
M⁻¹.to_block (λ i, b i < k) (λ i, b i < k) ⬝ M.to_block (λ i, b i < k) (λ i, b i < k) = 1 :=
begin
let p := (λ i, b i < k),
have h_sum : M⁻¹.to_block p p ⬝ M.to_block p p +
M⁻¹.to_block p (λ i, ¬ p i) ⬝ M.to_block (λ i, ¬ p i) p = 1,
by rw [←to_block_mul_eq_add, inv_mul_of_invertible M, to_block_one_self],
have h_zero : M.to_block (λ i, ¬ p i) p = 0,
{ ext i j,
simpa using hM (lt_of_lt_of_le j.2 (le_of_not_lt i.2)) },
simpa [h_zero] using h_sum
end

/-- The inverse of an upper-left subblock of a block-triangular matrix `M` is the upper-left
subblock of `M⁻¹`. -/
lemma block_triangular.inv_to_block [linear_order α] [invertible M]
(hM : block_triangular M b) (k : α) :
(M.to_block (λ i, b i < k) (λ i, b i < k))⁻¹ = M⁻¹.to_block (λ i, b i < k) (λ i, b i < k) :=
inv_eq_left_inv $ hM.to_block_inverse_mul_to_block_eq_one k

/-- An upper-left subblock of an invertible block-triangular matrix is invertible. -/
def block_triangular.invertible_to_block [linear_order α] [invertible M]
(hM : block_triangular M b) (k : α) :
invertible (M.to_block (λ i, b i < k) (λ i, b i < k)) :=
invertible_of_left_inverse _ ((⅟M).to_block (λ i, b i < k) (λ i, b i < k)) $
by simpa only [inv_of_eq_nonsing_inv] using hM.to_block_inverse_mul_to_block_eq_one k

/-- A lower-left subblock of the inverse of a block-triangular matrix is zero. This is a first step
towards `block_triangular.inv_to_block` below. -/
lemma to_block_inverse_eq_zero [linear_order α] [invertible M] (hM : block_triangular M b) (k : α) :
M⁻¹.to_block (λ i, k ≤ b i) (λ i, b i < k) = 0 :=
begin
let p := λ i, b i < k,
let q := λ i, ¬ b i < k,
have h_sum : M⁻¹.to_block q p ⬝ M.to_block p p + M⁻¹.to_block q q ⬝ M.to_block q p = 0,
{ rw [←to_block_mul_eq_add, inv_mul_of_invertible M, to_block_one_disjoint],
exact λ i h, h.1 h.2 },
have h_zero : M.to_block q p = 0,
{ ext i j,
simpa using hM (lt_of_lt_of_le j.2 $ le_of_not_lt i.2) },
have h_mul_eq_zero : M⁻¹.to_block q p ⬝ M.to_block p p = 0 := by simpa [h_zero] using h_sum,
haveI : invertible (M.to_block p p) := hM.invertible_to_block k,
have : (λ i, k ≤ b i) = q := by { ext, exact not_lt.symm },
rw [this, ← matrix.zero_mul (M.to_block p p)⁻¹, ← h_mul_eq_zero,
mul_inv_cancel_right_of_invertible],
end

/-- The inverse of a block-triangular matrix is block-triangular. -/
lemma block_triangular_inv_of_block_triangular [linear_order α] [invertible M]
(hM : block_triangular M b) :
block_triangular M⁻¹ b :=
begin
unfreezingI { induction hs : univ.image b using finset.strong_induction
with s ih generalizing m },
subst hs,
intros i j hij,
haveI : inhabited m := ⟨i⟩,
let k := (univ.image b).max' (univ_nonempty.image _),
let b' := λ i : {a // b a < k}, b ↑i,
let A := M.to_block (λ i, b i < k) (λ j, b j < k),
obtain hbi | hi : b i = k ∨ _ := (le_max' _ (b i) $ mem_image_of_mem _ $ mem_univ _).eq_or_lt,
{ have : M⁻¹.to_block (λ i, k ≤ b i) (λ i, b i < k) ⟨i, hbi.ge⟩ ⟨j, hbi ▸ hij⟩ = 0,
{ simp only [to_block_inverse_eq_zero hM k, pi.zero_apply] },
simp [this.symm] },
haveI : invertible A := hM.invertible_to_block _,
have hA : A.block_triangular b' := hM.submatrix,
have hb' : image b' univ ⊂ image b univ,
{ convert image_subtype_univ_ssubset_image_univ k b _ (λ a, a < k) (lt_irrefl _),
convert max'_mem _ _ },
have hij' : b' ⟨j, hij.trans hi⟩ < b' ⟨i, hi⟩, by simp_rw [b', subtype.coe_mk, hij],
simp [hM.inv_to_block k, (ih (image b' univ) hb' hA rfl hij').symm],
end

end matrix

0 comments on commit 46fc73a

Please sign in to comment.