In [None]:
import numpy as np
import pandas as pd

In [None]:
#data = np.random.random((3,3))
data = pd.read_csv('https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv')
data = data.select_dtypes(include='number').values
#cov = np.dot(data.T, data)

In [None]:
def sort_pairs(eigenvalues, eigenvectors):
    eigenpairs = [[0,[]] for _ in range(len(eigenvalues))]
    for i in range(len(eigenvalues)):
        if eigenvalues[i] < 0:
            eigenpairs[i][0] = -1*eigenvalues[i]
            eigenpairs[i][1] = -1*eigenvectors[i]
        else:
            eigenpairs[i][0] = eigenvalues[i]
            eigenpairs[i][1] = eigenvectors[i]
    eigenpairs.sort(key=lambda eigpair: eigpair[0], reverse=True)
    eigenvalues, eigenvectors = [], []
    for eigenpair in eigenpairs:
        eigenvalues.append(eigenpair[0])
        eigenvectors.append(np.array(eigenpair[1]))
    return np.array(eigenvalues), np.array(eigenvectors)
#do poprawki
#def sort_pairs(eigenvalues, eigenvectors):
    #return eigenvalues, eigenvectors

<h2>numpy - Eigen decomposition</h2>

In [None]:
def check_symmetric(a, tol=1e-8):
    return np.allclose(a, a.T, atol=tol)

def eigen_decomposition(A):
    if check_symmetric(A):
        eigenvalues, eigenvectors = np.linalg.eigh(A)
    else: 
        eigenvalues, eigenvectors = np.linalg.eig(A)
    return eigenvalues, eigenvectors

<h2>numpy - SVD</h2>

In [None]:
def svd_decomposition(A, gen_HHtransposed=False): #do usuniecia debug
    u,s,v = np.linalg.svd(A)
    s*=s
    if gen_HHtransposed:
        zeros = np.zeros((u.shape[0]-len(s),))
        s = np.concatenate((s, zeros))
        return s, u
    else:
        return s, v.T
    
#sprawdzić czy poprawne zwracanie u/v

<h2>numpy - QR</h2>

In [None]:
def qr_decomposition(A):
    X = A
    D = np.diag([1 for _ in range(A.shape[1])])
    #i = 0
    while not np.allclose(X, np.triu(X)):
        Q, R = np.linalg.qr(X)
        D = np.dot(D, Q)
        X = np.dot(R, Q)
        #i += 1
    #print(i)
    return np.diagonal(X), D

# probelmy z wydajnością. przeanalizować https://link-1springer-1com-1htv89mdl09c7.hansolo.bg.ug.edu.pl/article/10.1007/s13042-012-0131-7

<h2>Testy poprawności</h2>

<h3>H<sup>T</sup>H</h3>

In [None]:
cov = np.dot(data.T, data)
cov.shape

<h4>Eigendecomposition</h4>

In [None]:
evalsE, evecsE = eigen_decomposition(cov)

<h4>QR decomposition</h4>

In [None]:
evalsQ, evecsQ = qr_decomposition(cov)

<h4>Singular value decomposition</h4>

In [None]:
evalsS, evecsS = svd_decomposition(data)

<h5>Eigenvalues MSE</h5>

In [None]:
((np.array(sorted(evalsE)) - np.array(sorted(evalsQ)))**2).mean(axis=None)

In [None]:
((np.array(sorted(evalsE)) - np.array(sorted(evalsS)))**2).mean(axis=None)

In [None]:
((np.array(sorted(evalsQ)) - np.array(sorted(evalsS)))**2).mean(axis=None)

<h5>Decomp MSE</h5>

In [None]:
((evecsE.dot(np.diag(evalsE)).dot(np.linalg.inv(evecsE)) - cov)**2).mean(axis=None)

In [None]:
((evecsS.dot(np.diag(evalsS)).dot(np.linalg.inv(evecsS)) - cov)**2).mean(axis=None)

In [None]:
((evecsQ.dot(np.diag(evalsQ)).dot(np.linalg.inv(evecsQ)) - cov)**2).mean(axis=None)

<h3>HH<sup>T</sup></h3>

In [None]:
cov = np.dot(data, data.T)
cov.shape

<h4>Eigendecomposition</h4>

In [None]:
evalsE, evecsE = eigen_decomposition(cov)
evalsE.shape, evecsE.shape

<h4>QR decomposition</h4>

In [None]:
evalsQ, evecsQ = qr_decomposition(cov)

<h4>Singular value decomposition</h4>

In [None]:
evalsS, evecsS = svd_decomposition(data, True)

<h5>Eigenvalues MSE</h5>

In [None]:
((np.array(sorted(abs(evalsE))) - np.array(sorted(abs(evalsQ))))**2).mean(axis=None)

In [None]:
((np.array(sorted(abs(evalsE))) - np.array(sorted(abs(evalsS))))**2).mean(axis=None) #problem. SVD generuje S tylko dla wartości > 0.

In [None]:
((np.array(sorted(abs(evalsQ))) - np.array(sorted(abs(evalsS))))**2).mean(axis=None) #problem. SVD generuje S tylko dla wartości > 0.

<h5>Decomp MSE</h5>

In [None]:
((evecsE.dot(np.diag(evalsE)).dot(np.linalg.inv(evecsE)) - cov)**2).mean(axis=None)

In [None]:
((evecsS.dot(np.diag(evalsS)).dot(np.linalg.inv(evecsS)) - cov)**2).mean(axis=None)

In [None]:
((evecsQ.dot(np.diag(evalsQ)).dot(np.linalg.inv(evecsQ)) - cov)**2).mean(axis=None)

<h2>Wydajność</h2>

<h4>H<sup>T</sup>H</h4>

In [None]:
A = np.random.random((10,3))

In [None]:
%%timeit -n 100
cov = np.dot(A.T, A)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A)

In [None]:
A = np.random.random((100,4))

In [None]:
%%timeit -n 100
cov = np.dot(A.T, A)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A)

In [None]:
A = np.random.random((500,20))

In [None]:
%%timeit -n 100
cov = np.dot(A.T, A)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A)

<h4>HH<sup>T</sup></h4>

In [None]:
A = np.random.random((10,3))

In [None]:
%%timeit -n 100
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A, True)

In [None]:
A = np.random.random((100,4))

In [None]:
%%timeit -n 100
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A, True)

In [None]:
A = np.random.random((500,20))

In [None]:
%%timeit -n 100
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 100
svd_decomposition(A, True)