feat(analysis/inner_product_space): Gram-Schmidt Basis (#14514)

abentkamp · abentkamp · commit 67da2721856c · 2022-06-16T19:20:24.000Z
When the Gram-Schmidt procedure is given a basis, it produces a basis.

This pull request also refactors the lemma `gram_schmidt_span`. The new versions of the lemmas cover the span of `Iio`, the span of `Iic` and the span of the complete set of input vectors. I was also able to remove some type classes in the process.
diff --git a/src/analysis/inner_product_space/gram_schmidt_ortho.lean b/src/analysis/inner_product_space/gram_schmidt_ortho.lean
@@ -1,11 +1,12 @@
 /-
 Copyright (c) 2022 Jiale Miao. All rights reserved.
 Released under Apache 2.0 license as described in the file LICENSE.
-Authors: Jiale Miao, Kevin Buzzard
+Authors: Jiale Miao, Kevin Buzzard, Alexander Bentkamp
 -/
 
 import analysis.inner_product_space.projection
 import order.well_founded_set
+import analysis.inner_product_space.pi_L2
 
 /-!
 # Gram-Schmidt Orthogonalization and Orthonormalization
@@ -25,6 +26,8 @@ and outputs a set of orthogonal vectors which have the same span.
 - `gram_schmidt_ne_zero` :
   If the input vectors of `gram_schmidt` are linearly independent,
   then the output vectors are non-zero.
+- `gram_schmidt_basis` :
+  The basis produced by the Gram-Schmidt process when given a basis as input.
 - `gram_schmidt_normed` :
   the normalized `gram_schmidt` (i.e each vector in `gram_schmidt_normed` has unit length.)
 - `gram_schmidt_orthornormal` :
@@ -38,8 +41,7 @@ open_locale big_operators
 open finset
 
 variables (𝕜 : Type*) {E : Type*} [is_R_or_C 𝕜] [inner_product_space 𝕜 E]
-variables {ι : Type*} [linear_order ι] [order_bot ι]
-variables [locally_finite_order ι] [is_well_order ι (<)]
+variables {ι : Type*} [linear_order ι] [locally_finite_order_bot ι] [is_well_order ι (<)]
 
 local attribute [instance] is_well_order.to_has_well_founded
 
@@ -49,7 +51,7 @@ local notation `⟪`x`, `y`⟫` := @inner 𝕜 _ _ x y
 and outputs a set of orthogonal vectors which have the same span. -/
 noncomputable def gram_schmidt (f : ι → E) : ι → E
 | n := f n - ∑ i : Iio n, orthogonal_projection (𝕜 ∙ gram_schmidt i) (f n)
-using_well_founded { dec_tac := `[exact (mem_Ico.1 i.2).2] }
+using_well_founded { dec_tac := `[exact mem_Iio.1 i.2] }
 
 /-- This lemma uses `∑ i in` instead of `∑ i :`.-/
 lemma gram_schmidt_def (f : ι → E) (n : ι):
@@ -62,7 +64,8 @@ lemma gram_schmidt_def' (f : ι → E) (n : ι):
     orthogonal_projection (𝕜 ∙ gram_schmidt 𝕜 f i) (f n) :=
 by rw [gram_schmidt_def, sub_add_cancel]
 
-@[simp] lemma gram_schmidt_zero (f : ι → E) : gram_schmidt 𝕜 f ⊥ = f ⊥ :=
+@[simp] lemma gram_schmidt_zero {ι : Type*} [linear_order ι] [locally_finite_order ι]
+  [order_bot ι] [is_well_order ι (<)] (f : ι → E) : gram_schmidt 𝕜 f ⊥ = f ⊥ :=
 by rw [gram_schmidt_def, Iio_eq_Ico, finset.Ico_self, finset.sum_empty, sub_zero]
 
 /-- **Gram-Schmidt Orthogonalisation**:
@@ -93,7 +96,7 @@ begin
   cases hia.lt_or_lt with hia₁ hia₂,
   { rw inner_eq_zero_sym,
     exact ih a h₀ i hia₁ },
-  { exact ih i (mem_Ico.1 hi).2 a hia₂ }
+  { exact ih i (mem_Iio.1 hi) a hia₂ }
 end
 
 /-- This is another version of `gram_schmidt_orthogonal` using `pairwise` instead. -/
@@ -103,46 +106,56 @@ theorem gram_schmidt_pairwise_orthogonal (f : ι → E) :
 
 open submodule set order
 
-/-- `gram_schmidt` preserves span of vectors. -/
-lemma span_gram_schmidt [succ_order ι] [is_succ_archimedean ι] (f : ι → E) (c : ι) :
-  span 𝕜 (gram_schmidt 𝕜 f '' Iio c) = span 𝕜 (f '' Iio c) :=
+lemma mem_span_gram_schmidt (f : ι → E) {i j : ι} (hij : i ≤ j) :
+  f i ∈ span 𝕜 (gram_schmidt 𝕜 f '' Iic j) :=
+begin
+  rw [gram_schmidt_def' 𝕜 f i],
+  simp_rw orthogonal_projection_singleton,
+  exact submodule.add_mem _ (subset_span $ mem_image_of_mem _ hij)
+    (submodule.sum_mem _ $ λ k hk, smul_mem (span 𝕜 (gram_schmidt 𝕜 f '' Iic j)) _ $
+    subset_span $ mem_image_of_mem (gram_schmidt 𝕜 f) $ (finset.mem_Iio.1 hk).le.trans hij),
+end
+
+lemma gram_schmidt_mem_span (f : ι → E) :
+  ∀ {j i}, i ≤ j → gram_schmidt 𝕜 f i ∈ span 𝕜 (f '' Iic j)
+| j := λ i hij,
 begin
-  apply @succ.rec ι _ _ _ (λ c, span 𝕜 (gram_schmidt 𝕜 f '' Iio c) = span 𝕜 (f '' Iio c)) ⊥
-    _ _ _ bot_le,
-  { simp only [set.Iio_bot, set.image_empty] },
-  intros c _ hc,
-  by_cases h : succ c = c,
-  { rwa h },
-  have h₀ : ∀ b, b ∈ finset.Iio c → gram_schmidt 𝕜 f b ∈ span 𝕜 (f '' Iio c),
-  { simp_intros b hb only [finset.mem_range, nat.succ_eq_add_one],
-    rw ← hc,
-    refine subset_span _,
-    simp only [set.mem_image, set.mem_Iio],
-    refine ⟨b, (finset.mem_Ico.1 hb).2, by refl⟩ },
-  rw not_iff_not.2 order.succ_eq_iff_is_max at h,
-  rw [order.Iio_succ_eq_insert_of_not_is_max h],
-  simp only [span_insert, image_insert_eq, hc],
-  apply le_antisymm,
-  { simp only [nat.succ_eq_succ,gram_schmidt_def 𝕜 f c, orthogonal_projection_singleton,
-      sup_le_iff, span_singleton_le_iff_mem, le_sup_right, and_true],
-    apply submodule.sub_mem _ _ _,
-    { exact mem_sup_left (mem_span_singleton_self (f c)) },
-    { exact submodule.sum_mem _ (λ b hb, mem_sup_right (smul_mem _ _ (h₀ b hb))) } },
-  { rw [gram_schmidt_def' 𝕜 f c],
-    simp only [orthogonal_projection_singleton,
-      sup_le_iff, span_singleton_le_iff_mem, le_sup_right, and_true],
-    apply submodule.add_mem _ _ _,
-    { exact mem_sup_left (mem_span_singleton_self (gram_schmidt 𝕜 f c)), },
-    { exact submodule.sum_mem _ (λ b hb, mem_sup_right (smul_mem _ _ (h₀ b hb))) } }
+  rw [gram_schmidt_def 𝕜 f i],
+  simp_rw orthogonal_projection_singleton,
+  refine submodule.sub_mem _ (subset_span (mem_image_of_mem _ hij))
+    (submodule.sum_mem _ $ λ k hk, _),
+  let hkj : k < j := (finset.mem_Iio.1 hk).trans_le hij,
+  exact smul_mem _ _ (span_mono (image_subset f $ Iic_subset_Iic.2 hkj.le) $
+    gram_schmidt_mem_span le_rfl),
 end
+using_well_founded { dec_tac := `[assumption] }
+
+lemma span_gram_schmidt_Iic (f : ι → E) (c : ι) :
+  span 𝕜 (gram_schmidt 𝕜 f '' Iic c) = span 𝕜 (f '' Iic c) :=
+span_eq_span (set.image_subset_iff.2 $ λ i, gram_schmidt_mem_span _ _) $
+  set.image_subset_iff.2 $ λ i, mem_span_gram_schmidt _ _
 
-lemma gram_schmidt_ne_zero_coe [succ_order ι] [is_succ_archimedean ι]
+lemma span_gram_schmidt_Iio (f : ι → E) (c : ι) :
+  span 𝕜 (gram_schmidt 𝕜 f '' Iio c) = span 𝕜 (f '' Iio c) :=
+span_eq_span
+  (set.image_subset_iff.2 $ λ i hi, span_mono (image_subset _ $ Iic_subset_Iio.2 hi) $
+    gram_schmidt_mem_span _ _ le_rfl) $
+  set.image_subset_iff.2 $ λ i hi, span_mono (image_subset _ $ Iic_subset_Iio.2 hi) $
+    mem_span_gram_schmidt _ _ le_rfl
+
+/-- `gram_schmidt` preserves span of vectors. -/
+lemma span_gram_schmidt (f : ι → E) : span 𝕜 (range (gram_schmidt 𝕜 f)) = span 𝕜 (range f) :=
+span_eq_span (range_subset_iff.2 $ λ i, span_mono (image_subset_range _ _) $
+  gram_schmidt_mem_span _ _ le_rfl) $
+  range_subset_iff.2 $ λ i, span_mono (image_subset_range _ _) $ mem_span_gram_schmidt _ _ le_rfl
+
+lemma gram_schmidt_ne_zero_coe
     (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 (f ∘ (coe : set.Iic n → ι))) :
   gram_schmidt 𝕜 f n ≠ 0 :=
 begin
   by_contra h,
   have h₁ : f n ∈ span 𝕜 (f '' Iio n),
-  { rw [← span_gram_schmidt 𝕜 f n, gram_schmidt_def' _ f, h, zero_add],
+  { rw [← span_gram_schmidt_Iio 𝕜 f n, gram_schmidt_def' _ f, h, zero_add],
     apply submodule.sum_mem _ _,
     simp_intros a ha only [finset.mem_Ico],
     simp only [set.mem_image, set.mem_Iio, orthogonal_projection_singleton],
@@ -161,39 +174,89 @@ end
 
 /-- If the input vectors of `gram_schmidt` are linearly independent,
 then the output vectors are non-zero. -/
-lemma gram_schmidt_ne_zero [succ_order ι] [is_succ_archimedean ι]
-    (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 f) :
+lemma gram_schmidt_ne_zero (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 f) :
   gram_schmidt 𝕜 f n ≠ 0 :=
 gram_schmidt_ne_zero_coe _ _ _ (linear_independent.comp h₀ _ subtype.coe_injective)
 
+/-- `gram_schmidt` produces a triangular matrix of vectors when given a basis. -/
+lemma gram_schmidt_triangular {i j : ι} (hij : i < j) (b : basis ι 𝕜 E) :
+  b.repr (gram_schmidt 𝕜 b i) j = 0 :=
+begin
+  have : gram_schmidt 𝕜 b i ∈ span 𝕜 (gram_schmidt 𝕜 b '' set.Iio j),
+    from subset_span ((set.mem_image _ _ _).2 ⟨i, hij, rfl⟩),
+  have : gram_schmidt 𝕜 b i ∈ span 𝕜 (b '' set.Iio j),
+    by rwa [← span_gram_schmidt_Iio 𝕜 b j],
+  have : ↑(((b.repr) (gram_schmidt 𝕜 b i)).support) ⊆ set.Iio j,
+    from basis.repr_support_subset_of_mem_span b (set.Iio j) this,
+  exact (finsupp.mem_supported' _ _).1
+    ((finsupp.mem_supported 𝕜 _).2 this) j (not_mem_Iio.2 (le_refl j)),
+end
+
+/-- `gram_schmidt` produces linearly independent vectors when given linearly independent vectors. -/
+lemma gram_schmidt_linear_independent (f : ι → E) (h₀ : linear_independent 𝕜 f) :
+  linear_independent 𝕜 (gram_schmidt 𝕜 f) :=
+linear_independent_of_ne_zero_of_inner_eq_zero
+    (λ i, gram_schmidt_ne_zero _ _ _ h₀) (λ i j, gram_schmidt_orthogonal 𝕜 f)
+
+/-- When given a basis, `gram_schmidt` produces a basis. -/
+noncomputable def gram_schmidt_basis (b : basis ι 𝕜 E) : basis ι 𝕜 E :=
+basis.mk
+  (gram_schmidt_linear_independent 𝕜 b b.linear_independent)
+  ((span_gram_schmidt 𝕜 b).trans b.span_eq)
+
+lemma coe_gram_schmidt_basis (b : basis ι 𝕜 E) :
+  (gram_schmidt_basis 𝕜 b : ι → E) = gram_schmidt 𝕜 b := basis.coe_mk _ _
+
 /-- the normalized `gram_schmidt`
 (i.e each vector in `gram_schmidt_normed` has unit length.) -/
 noncomputable def gram_schmidt_normed (f : ι → E) (n : ι) : E :=
 (∥gram_schmidt 𝕜 f n∥ : 𝕜)⁻¹ • (gram_schmidt 𝕜 f n)
 
-lemma gram_schmidt_normed_unit_length_coe [succ_order ι] [is_succ_archimedean ι]
+lemma gram_schmidt_normed_unit_length_coe
     (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 (f ∘ (coe : set.Iic n → ι))) :
   ∥gram_schmidt_normed 𝕜 f n∥ = 1 :=
 by simp only [gram_schmidt_ne_zero_coe 𝕜 f n h₀,
   gram_schmidt_normed, norm_smul_inv_norm, ne.def, not_false_iff]
 
-lemma gram_schmidt_normed_unit_length [succ_order ι] [is_succ_archimedean ι]
-    (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 f) :
+lemma gram_schmidt_normed_unit_length (f : ι → E) (n : ι) (h₀ : linear_independent 𝕜 f) :
   ∥gram_schmidt_normed 𝕜 f n∥ = 1 :=
 gram_schmidt_normed_unit_length_coe _ _ _ (linear_independent.comp h₀ _ subtype.coe_injective)
 
 /-- **Gram-Schmidt Orthonormalization**:
 `gram_schmidt_normed` produces an orthornormal system of vectors. -/
-theorem gram_schmidt_orthonormal [succ_order ι] [is_succ_archimedean ι]
-    (f : ι → E) (h₀ : linear_independent 𝕜 f) :
+theorem gram_schmidt_orthonormal (f : ι → E) (h₀ : linear_independent 𝕜 f) :
   orthonormal 𝕜 (gram_schmidt_normed 𝕜 f) :=
 begin
   unfold orthonormal,
   split,
-  { simp only [gram_schmidt_normed_unit_length, h₀, forall_const] },
+  { simp only [gram_schmidt_normed_unit_length, h₀, eq_self_iff_true, implies_true_iff], },
   { intros i j hij,
     simp only [gram_schmidt_normed, inner_smul_left, inner_smul_right, is_R_or_C.conj_inv,
       is_R_or_C.conj_of_real, mul_eq_zero, inv_eq_zero, is_R_or_C.of_real_eq_zero, norm_eq_zero],
     repeat { right },
     exact gram_schmidt_orthogonal 𝕜 f hij }
 end
+
+lemma span_gram_schmidt_normed (f : ι → E) (s : set ι) :
+  span 𝕜 (gram_schmidt_normed 𝕜 f '' s) = span 𝕜 (gram_schmidt 𝕜 f '' s) :=
+begin
+  refine span_eq_span (set.image_subset_iff.2 $ λ i hi, smul_mem _ _ $ subset_span $
+    mem_image_of_mem _ hi)
+    (set.image_subset_iff.2 $ λ i hi, span_mono (image_subset _ $ singleton_subset_set_iff.2 hi) _),
+  simp only [coe_singleton, set.image_singleton],
+  by_cases h : gram_schmidt 𝕜 f i = 0,
+  { simp [h] },
+  { refine mem_span_singleton.2 ⟨∥gram_schmidt 𝕜 f i∥, smul_inv_smul₀ _ _⟩,
+    exact_mod_cast (norm_ne_zero_iff.2 h) }
+end
+
+lemma span_gram_schmidt_normed_range (f : ι → E) :
+  span 𝕜 (range (gram_schmidt_normed 𝕜 f)) = span 𝕜 (range (gram_schmidt 𝕜 f)) :=
+by simpa only [image_univ.symm] using span_gram_schmidt_normed 𝕜 f univ
+
+/-- When given a basis, `gram_schmidt_normed` produces an orthonormal basis. -/
+noncomputable def gram_schmidt_orthonormal_basis [fintype ι] (b : basis ι 𝕜 E) :
+  orthonormal_basis ι 𝕜 E :=
+orthonormal_basis.mk
+  (gram_schmidt_orthonormal 𝕜 b b.linear_independent)
+  (((span_gram_schmidt_normed_range 𝕜 b).trans (span_gram_schmidt 𝕜 b)).trans b.span_eq)
diff --git a/src/linear_algebra/span.lean b/src/linear_algebra/span.lean
@@ -57,6 +57,9 @@ le_antisymm (span_le.2 h₁) h₂
 lemma span_eq : span R (p : set M) = p :=
 span_eq_of_le _ (subset.refl _) subset_span
 
+lemma span_eq_span (hs : s ⊆ span R t) (ht : t ⊆ span R s) : span R s = span R t :=
+le_antisymm (span_le.2 hs) (span_le.2 ht)
+
 /-- A version of `submodule.span_eq` for when the span is by a smaller ring. -/
 @[simp] lemma span_coe_eq_restrict_scalars
   [semiring S] [has_scalar S R] [module S M] [is_scalar_tower S R M] :