In [1]:
from numpy import linalg as LA
import numpy as np


In [2]:
A = np.array([[1,2,3],[3,2,1],[1,0,-1]])


In [3]:
values, vectors = LA.eig(A)

In [4]:
values

array([ 4.31662479e+00, -2.31662479e+00,  1.47314580e-16])

In [5]:
vectors

array([[ 0.58428153,  0.73595785,  0.40824829],
       [ 0.80407569, -0.38198836, -0.81649658],
       [ 0.10989708, -0.55897311,  0.40824829]])

## Prove A*eigenvector = eigenvalue * eigenvectors

In [6]:
np.dot(A,vectors)#mult

array([[ 2.52212416e+00, -1.70493820e+00, -1.66533454e-16],
       [ 3.47089307e+00,  8.84923713e-01,  3.88578059e-16],
       [ 4.74384456e-01,  1.29493096e+00,  2.77555756e-16]])

In [7]:
vectors*values#dot product

array([[ 2.52212416e+00, -1.70493820e+00,  6.01409256e-17],
       [ 3.47089307e+00,  8.84923713e-01, -1.20281851e-16],
       [ 4.74384456e-01,  1.29493096e+00,  6.01409256e-17]])

In [9]:
aa=np.dot(vectors,np.diag(values))
aa

array([[ 2.52212416e+00, -1.70493820e+00,  6.01409256e-17],
       [ 3.47089307e+00,  8.84923713e-01, -1.20281851e-16],
       [ 4.74384456e-01,  1.29493096e+00,  6.01409256e-17]])

In [10]:
np.dot(aa,np.linalg.inv(vectors))

array([[ 1.00000000e+00,  2.00000000e+00,  3.00000000e+00],
       [ 3.00000000e+00,  2.00000000e+00,  1.00000000e+00],
       [ 1.00000000e+00,  3.03603991e-16, -1.00000000e+00]])

The Moore-Penrose Pseudoinverse
A+= VD+U.T

In [11]:
import numpy as np

In [12]:
a = np.random.randn(9, 6)

In [13]:
 B = np.linalg.pinv(a)

In [14]:
np.allclose(a, np.dot(a, np.dot(B, a)))

True

In [15]:
B

array([[-1.95621227e-01, -6.41922165e-02, -1.39432000e-01,
        -2.77342466e-01,  3.26814905e-01,  2.26214426e-01,
         2.84605984e-01, -5.35256836e-01,  1.08627557e-01],
       [ 1.35021231e-01,  1.60962677e-01,  2.97275913e-01,
        -2.57677244e-01,  1.18600791e-01, -1.14514273e-01,
         3.43533840e-02, -2.02754983e-02,  9.06276666e-02],
       [-1.09164138e-01, -5.31883610e-04, -6.05019171e-02,
         1.17203423e-01,  8.42122233e-02,  2.58901546e-01,
         1.38157303e-01, -2.16855562e-01,  2.33381911e-01],
       [-3.14337596e-01,  1.75836662e-01, -5.30523785e-02,
        -2.51217601e-01,  2.24803143e-01,  4.16204620e-01,
         4.71085821e-01, -2.17303128e-01, -7.81287760e-02],
       [-3.33303824e-01,  1.69062487e-01, -4.74876346e-01,
         1.37595668e-01,  1.06592166e-01, -9.17401299e-02,
         1.79407288e-01, -1.98673116e-01, -5.35781065e-02],
       [ 1.31081938e-01, -3.15257578e-01,  5.50151488e-02,
         4.18145447e-01, -5.07641017e-01,  3.744684

The determinant is equal to the product of all theeigenvalues of the matrix. 

# Principal Components Analysis

In [17]:
A = np.array([[1, 2,3], [3, 4,5], [5, 6,7],[10,22,14]])

In [18]:
M = np.mean(A.T, axis=1)
M

array([4.75, 8.5 , 7.25])

Next, we need to center the values in each column by subtracting the mean column value

In [19]:
C = A - M
C

array([[-3.75, -6.5 , -4.25],
       [-1.75, -4.5 , -2.25],
       [ 0.25, -2.5 , -0.25],
       [ 5.25, 13.5 ,  6.75]])

Correlation is a normalized measure of the amount and direction (positive or negative) that two columns change together. Covariance is a generalized and unnormalized version of correlation across multiple columns. A covariance matrix is a calculation of covariance of a given matrix with covariance scores for every column with every other column, including itself.

In [20]:
V = np.cov(C.T)

In [21]:
V

array([[14.91666667, 34.16666667, 18.41666667],
       [34.16666667, 83.66666667, 43.16666667],
       [18.41666667, 43.16666667, 22.91666667]])

Finally, we calculate the eigendecomposition of the covariance matrix V. This results in a list of eigenvalues and a list of eigenvectors.

In [23]:
values, vectors = LA.eig(V)


In [24]:
values

array([1.20358969e+02, 1.14103116e+00, 6.47749343e-16])

In [25]:
vectors

array([[-0.34541767, -0.69819965, -0.62705971],
       [-0.83211227,  0.53681626, -0.1393466 ],
       [-0.4339076 , -0.4736513 ,  0.76640632]])

Checking the eigenvector-eigenvalue calculation

In [26]:
for i in range(len(values)):
    eigv = vectors[:,i].reshape(1,3).T
    np.testing.assert_array_almost_equal(V.dot(eigv), values[i] * eigv,
                                         decimal=6, err_msg='', verbose=True)

In [27]:
eigv

array([[-0.62705971],
       [-0.1393466 ],
       [ 0.76640632]])

where we are reducing a 3-dimensional feature space to a 2-dimensional feature subspace, we are combining the two eigenvectors with the highest eigenvalues to construct our d×k-dimensional eigenvector matrix W

In [28]:
P = vectors[:,:-1].T.dot(C.T)

In [29]:
P.T

array([[  8.5481533 ,   1.14196107],
       [  5.32527823,  -0.12810833],
       [  2.10240316,  -1.39817772],
       [-15.9758347 ,   0.38432499]])

In [30]:

from numpy import array
from sklearn.decomposition import PCA
# define a matrix
print(A)
# create the PCA instance
pca = PCA(2)
# fit on data
pca.fit(A)
# access values and vectors
print(pca.components_)
print(pca.explained_variance_)
# transform data
B = pca.transform(A)
print(B)

[[ 1  2  3]
 [ 3  4  5]
 [ 5  6  7]
 [10 22 14]]
[[ 0.34541767  0.83211227  0.4339076 ]
 [ 0.69819965 -0.53681626  0.4736513 ]]
[120.35896884   1.14103116]
[[-8.5481533  -1.14196107]
 [-5.32527823  0.12810833]
 [-2.10240316  1.39817772]
 [15.9758347  -0.38432499]]
