<a href="https://colab.research.google.com/github/stevensmiley1989/data-science-ipython-notebooks/blob/master/simple_linear_algebra.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
def row_echelon(A):
    """ Return Row Echelon Form of matrix A """

    # if matrix A has no columns or rows,
    # it is already in REF, so we return itself
    r, c = A.shape
    if r == 0 or c == 0:
        return A

    # we search for non-zero element in the first column
    for i in range(len(A)):
        if A[i,0] != 0:
            break
    else:
        # if all elements in the first column is zero,
        # we perform REF on matrix from second column
        B = row_echelon(A[:,1:])
        # and then add the first zero-column back
        return np.hstack([A[:,:1], B])

    # if non-zero element happens not in the first row,
    # we switch rows
    if i > 0:
        ith_row = A[i].copy()
        A[i] = A[0]
        A[0] = ith_row

    # we divide first row by first element in it
    A[0] = A[0] / A[0,0]
    # we subtract all subsequent rows with first row (it has 1 now as first element)
    # multiplied by the corresponding element in the first column
    A[1:] -= A[0] * A[1:,0:1]
    return A 

In [None]:
print('############################')
#Creating a matrix A
A = np.array([[2,43,123,4],[1,4,4,2],[2,7,3,9],[3,8,3,4]])
#A=row_echelon(A)
print('A=',A)
#Performing SVD
U, D, VT = np.linalg.svd(A)
 
#Checking if we can remake the original matrix using U,D,VT
A_remake = (U @ np.diag(D) @ VT)
print('A_remake = (U@np.diag(D)@VT)',A_remake)
A_also_remake=np.dot(np.dot(U,np.diag(D)),VT)

In [None]:
#get covariance matrix of x
cm=np.zeros(shape=A.shape)
for i in range(A.shape[0]):
    for j in range(A.shape[1]):
        cm[i,j]=np.dot(A[i]-np.mean(A[i]),(A[j]-np.mean(A[j])))/2
print('###############################')
print('Covariance Matrix of A by hand')
print(cm)
print('np.cov(A)')
print(np.cov(A))
print(' ')

In [None]:
from sklearn.decomposition import PCA
pca=PCA()
cA_pca=pca.fit_transform(np.cov(A))
print('#############################')
print('PCA of np.cov(A)')
print(cA_pca)
print('pca.n_components_=',pca.n_components_)
tot = sum(pca.explained_variance_)
print('sum(pca.explained_variance_)',tot)
var_exp = [(i/tot)*100 for i in sorted(pca.explained_variance_, reverse=True)]
print('var_exp',var_exp)
# Cumulative explained variance
cum_var_exp = np.cumsum(var_exp)
print('cum_var_exp = np.cumsum(var_exp)',cum_var_exp)
componentsVariance = [A.shape[0]*A.shape[1], np.argmax(cum_var_exp > 99) + 1, np.argmax(cum_var_exp > 95) + 1]
print('componentsVariance',componentsVariance)
print(' ')

pca=PCA()
A_pca=pca.fit_transform(A)
print('#############################')
print('PCA of A')
print(A_pca)
print('pca.n_components_=',pca.n_components_)
tot = sum(pca.explained_variance_)
print('sum(pca.explained_variance_)',tot)
var_exp = [(i/tot)*100 for i in sorted(pca.explained_variance_, reverse=True)]
print('var_exp',var_exp)
# Cumulative explained variance
cum_var_exp = np.cumsum(var_exp)
print('cum_var_exp = np.cumsum(var_exp)',cum_var_exp)
componentsVariance = [A.shape[0]*A.shape[1], np.argmax(cum_var_exp > 99) + 1, np.argmax(cum_var_exp > 95) + 1]
print('componentsVariance',componentsVariance)
print(' ')

In [None]:
print('##############A###############')
print('eigenvalues from np.linalg.svd')
print(sorted(np.linalg.svd(A)[1]**2)) #eigenvalues from np.linalg.svd
print('eigenvalues from np.linalg.eig')
print(sorted(np.linalg.eig(np.dot(A.T,A))[0])) #eigenvalues from np.linalg.eig
print('Pretty close, but not the exact same')
print('##############np.cov(A)################')
print('eigenvalues from np.linalg.svd')
print(sorted(np.linalg.svd(np.cov(A))[1]**2)) #eigenvalues from np.linalg.svd
print('eigenvalues from np.linalg.eig')
print(sorted(np.linalg.eig(np.dot(np.cov(A).T,np.cov(A)))[0])) #eigenvalues from np.linalg.eig
print('Pretty close, but not the exact same')

In [None]:
print('##############################')
print('prove (A-eig_v)*eig_vec=0')
print('A',A)
eig_values_A=np.linalg.eig(np.dot(A.T,A))[0]
print('eig_values_A',sorted(eig_values_A))
eig_vectors_A=np.linalg.eig(np.dot(A.T,A))[1]
print('eig_vectors_A',eig_vectors_A)
result=np.sum((np.dot(A.T,A)-np.diag(eig_values_A))*eig_vectors_A.T)
print(result)
print('np.sum((A-np.diag(eig_values_A))*eig_vectors_A.T) =',np.round(np.sqrt(result.real**2+result.imag**2),4))
