|
| 1 | +/- |
| 2 | +Copyright (c) 2020 Scott Morrison. All rights reserved. |
| 3 | +Released under Apache 2.0 license as described in the file LICENSE. |
| 4 | +Authors: Scott Morrison |
| 5 | +-/ |
| 6 | +import tactic.apply_fun |
| 7 | +import ring_theory.matrix_algebra |
| 8 | +import ring_theory.polynomial_algebra |
| 9 | +import linear_algebra.nonsingular_inverse |
| 10 | +import tactic.squeeze |
| 11 | + |
| 12 | +/-! |
| 13 | +# Characteristic polynomials and the Cayley-Hamilton theorem |
| 14 | +
|
| 15 | +We define characteristic polynomials of matrices and |
| 16 | +prove the Cayley–Hamilton theorem over arbitrary commutative rings. |
| 17 | +
|
| 18 | +## Main definitions |
| 19 | +
|
| 20 | +* `char_poly` is the characteristic polynomial of a matrix. |
| 21 | +
|
| 22 | +## Implementation details |
| 23 | +
|
| 24 | +We follow a nice proof from http://drorbn.net/AcademicPensieve/2015-12/CayleyHamilton.pdf |
| 25 | +-/ |
| 26 | + |
| 27 | +noncomputable theory |
| 28 | + |
| 29 | +universes u v w |
| 30 | + |
| 31 | +open polynomial matrix |
| 32 | +open_locale big_operators |
| 33 | + |
| 34 | +variables {R : Type u} [comm_ring R] |
| 35 | +variables {n : Type w} [fintype n] [decidable_eq n] |
| 36 | + |
| 37 | +open finset |
| 38 | + |
| 39 | +/-- |
| 40 | +The "characteristic matrix" of `M : matrix n n R` is the matrix of polynomials $t I - M$. |
| 41 | +The determinant of this matrix is the characteristic polynomial. |
| 42 | +-/ |
| 43 | +def char_matrix (M : matrix n n R) : matrix n n (polynomial R) := |
| 44 | +matrix.scalar n (X : polynomial R) - (C : R →+* polynomial R).map_matrix M |
| 45 | + |
| 46 | +@[simp] lemma char_matrix_apply_eq (M : matrix n n R) (i : n) : |
| 47 | + char_matrix M i i = (X : polynomial R) - C (M i i) := |
| 48 | +by simp only [char_matrix, sub_left_inj, pi.sub_apply, scalar_apply_eq, |
| 49 | + ring_hom.map_matrix_apply, map_apply] |
| 50 | + |
| 51 | +@[simp] lemma char_matrix_apply_ne (M : matrix n n R) (i j : n) (h : i ≠ j) : |
| 52 | + char_matrix M i j = - C (M i j) := |
| 53 | +by simp only [char_matrix, pi.sub_apply, scalar_apply_ne _ _ _ h, zero_sub, |
| 54 | + ring_hom.map_matrix_apply, map_apply] |
| 55 | + |
| 56 | +lemma mat_poly_equiv_char_matrix (M : matrix n n R) : |
| 57 | + mat_poly_equiv (char_matrix M) = X - C M := |
| 58 | +begin |
| 59 | + ext k i j, |
| 60 | + simp only [mat_poly_equiv_coeff_apply, coeff_sub, pi.sub_apply], |
| 61 | + by_cases h : i = j, |
| 62 | + { subst h, rw [char_matrix_apply_eq, coeff_sub], |
| 63 | + simp only [coeff_X, coeff_C], |
| 64 | + split_ifs; simp, }, |
| 65 | + { rw [char_matrix_apply_ne _ _ _ h, coeff_X, coeff_neg, coeff_C, coeff_C], |
| 66 | + split_ifs; simp [h], } |
| 67 | +end |
| 68 | + |
| 69 | +/-- |
| 70 | +The characteristic polynomial of a matrix `M` is given by $\det (t I - M)$. |
| 71 | +-/ |
| 72 | +def char_poly (M : matrix n n R) : polynomial R := |
| 73 | +(char_matrix M).det |
| 74 | + |
| 75 | +/-- |
| 76 | +The Cayley-Hamilton theorem, that the characteristic polynomial of a matrix, |
| 77 | +applied to the matrix itself, is zero. |
| 78 | +
|
| 79 | +This holds over any commutative ring. |
| 80 | +-/ |
| 81 | +-- This proof follows http://drorbn.net/AcademicPensieve/2015-12/CayleyHamilton.pdf |
| 82 | +theorem char_poly_map_eval_self (M : matrix n n R) : |
| 83 | + ((char_poly M).map (algebra_map R (matrix n n R))).eval M = 0 := |
| 84 | +begin |
| 85 | + -- We begin with the fact $χ_M(t) I = adjugate (t I - M) * (t I - M)$, |
| 86 | + -- as an identity in `matrix n n (polynomial R)`. |
| 87 | + have h : (char_poly M) • (1 : matrix n n (polynomial R)) = |
| 88 | + adjugate (char_matrix M) * (char_matrix M) := |
| 89 | + (adjugate_mul _).symm, |
| 90 | + -- Using the algebra isomorphism `matrix n n (polynomial R) ≃ₐ[R] polynomial (matrix n n R)`, |
| 91 | + -- we have the same identity in `polynomial (matrix n n R)`. |
| 92 | + apply_fun mat_poly_equiv at h, |
| 93 | + simp only [mat_poly_equiv.map_mul, |
| 94 | + mat_poly_equiv_char_matrix] at h, |
| 95 | + -- Because the coefficient ring `matrix n n R` is non-commutative, |
| 96 | + -- evaluation at `M` is not multiplicative. |
| 97 | + -- However, any polynomial which is a product of the form $N * (t I - M)$ |
| 98 | + -- is sent to zero, because the evaluation function puts the polynomial variable |
| 99 | + -- to the right of any coefficients, so everything telescopes. |
| 100 | + apply_fun (λ p, p.eval M) at h, |
| 101 | + rw eval_mul_X_sub_C at h, |
| 102 | + -- Now $χ_M (t) I$, when thought of as a polynomial of matrices |
| 103 | + -- and evaluated at some `N` is exactly $χ_M (N)$. |
| 104 | + rw mat_poly_equiv_smul_one at h, |
| 105 | + -- Thus we have $χ_M(M) = 0$, which is the desired result. |
| 106 | + exact h, |
| 107 | +end |
0 commit comments