In [1]:
import numpy as np
import pandas as pd

In [2]:
#data = np.random.random((3,3))
data = pd.read_csv('https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv')
data = data.select_dtypes(include='number').values
#cov = np.dot(data.T, data)

<h2>numpy - Eigen decomposition</h2>

In [3]:
def check_symmetric(a, tol=1e-8):
    return np.allclose(a, a.T, atol=tol)

def extract_eigenvectors(v, n_components):
    V = []
    for i in range(n_components):
        V.append(v[:,i])
    return np.array(V)
    
def eigen_decomposition(A, extract_vectors=False):
    if check_symmetric(A):
        eigenvalues, oMatrix = np.linalg.eigh(A)
    else: 
        eigenvalues, oMatrix = np.linalg.eig(A)
    if extract_vectors:
        return  eigenvalues, extract_eigenvectors(oMatrix, len(eigenvalues))
    return eigenvalues, oMatrix

<h2>numpy - SVD</h2>

In [4]:
def svd_decomposition(A, gen_HHtransposed=False, extract_vectors=False): #do usuniecia debug
    u,s,v = np.linalg.svd(A)
    s*=s
    oMatrix = u if gen_HHtransposed else v.T
    zeros = np.zeros((oMatrix.shape[1]-len(s),))
    diagonal = np.concatenate((s, zeros))
    if extract_vectors:
        return diagonal, extract_eigenvectors(oMatrix, len(diagonal))
    return diagonal, oMatrix

<h2>numpy - QR</h2>

In [5]:
def qr_decomposition(A, extract_vectors=False):
    X = A
    oMatrix = np.diag([1 for _ in range(A.shape[1])])
    while not np.allclose(X, np.triu(X)):
        Q, R = np.linalg.qr(X)
        oMatrix = np.dot(oMatrix, Q)
        X = np.dot(R, Q)
    diagonal = np.diagonal(X)
    if extract_vectors:
        return diagonal, extract_eigenvectors(oMatrix, len(diagonal))
    return diagonal, oMatrix

# probelmy z wydajnością i dokładnością wyników + absolutnie nie mam pojęcia jak to działa

<h2>numpy - QR & SVD</h2>

https://link-1springer-1com-1htv89mdl09c7.hansolo.bg.ug.edu.pl/article/10.1007/s13042-012-0131-7

In [6]:
def qrsvd_decomposition(A, gen_HHtransposed=False, extract_vectors=False):
    Q, R = np.linalg.qr(A if gen_HHtransposed else A.T)
    U, S, V = np.linalg.svd(R.T)
    S*=S
    oMatrix = np.dot(Q, V.T)
    zeros = np.zeros((oMatrix.shape[1]-len(S),))
    diagonal = np.concatenate((S, zeros))
    if extract_vectors:
        return diagonal, extract_eigenvectors(oMatrix, len(diagonal))
    return diagonal, oMatrix

<h2>Testy poprawności</h2>

<h3>H<sup>T</sup>H</h3>

In [7]:
cov = np.dot(data.T, data)
cov.shape

(4, 4)

<h4>Eigendecomposition</h4>

In [8]:
evalsE, evecsE = eigen_decomposition(cov)

<h4>QR decomposition</h4>

In [9]:
evalsQ, evecsQ = qr_decomposition(cov)

<h4>Singular value decomposition</h4>

In [10]:
evalsS, evecsS = svd_decomposition(data)

<h4>QR/SVD</h4>

In [11]:
evalsQS, evecsQS = qrsvd_decomposition(data)

In [12]:
evalsQS

array([9.20830507e+03, 3.15454317e+02, 1.19780429e+01, 3.55257020e+00])

In [13]:
evecsQS

array([[-0.75110816,  0.2841749 ,  0.50215472,  0.32081425],
       [-0.38008617,  0.5467445 , -0.67524332, -0.31725607],
       [-0.51300886, -0.70866455, -0.05916621, -0.48074507],
       [-0.16790754, -0.34367081, -0.53701625,  0.75187165]])

<h5>Decomp MSE</h5>

Dla macierzy ortogonalnych odwrotność jest równoważna z transpozycją

In [12]:
((evecsE.dot(np.diag(evalsE)).dot(evecsE.T) - cov)**2).mean(axis=None)

1.2950950362066813e-24

In [13]:
((evecsS.dot(np.diag(evalsS)).dot(evecsS.T) - cov)**2).mean(axis=None)

1.8136985061862162e-24

In [14]:
((evecsQ.dot(np.diag(evalsQ)).dot(evecsQ.T) - cov)**2).mean(axis=None)

5.1182643627130864e-18

In [15]:
((evecsQS.dot(np.diag(evalsQS)).dot(evecsQS.T) - cov)**2).mean(axis=None)

6.734776916023174e-24

<h3>HH<sup>T</sup></h3>

In [16]:
cov = np.dot(data, data.T)
cov.shape

(150, 150)

<h4>Eigendecomposition</h4>

In [17]:
evalsE, evecsE = eigen_decomposition(cov)
evalsE.shape, evecsE.shape

((150,), (150, 150))

<h4>QR decomposition</h4>

In [18]:
evalsQ, evecsQ = qr_decomposition(cov)

<h4>Singular value decomposition</h4>

In [19]:
evalsS, evecsS = svd_decomposition(data, True)

<h4>QR/SVD</h4>

In [20]:
evalsQS, evecsQS = qrsvd_decomposition(data, True)

<h5>Decomp MSE</h5>

Dla macierzy ortogonalnych odwrotność jest równoważna z transpozycją

In [21]:
((evecsE.dot(np.diag(evalsE)).dot(evecsE.T) - cov)**2).mean(axis=None)

1.3061348816216142e-27

In [22]:
((evecsS.dot(np.diag(evalsS)).dot(evecsS.T) - cov)**2).mean(axis=None)

1.3646157700619986e-27

In [23]:
((evecsQ.dot(np.diag(evalsQ)).dot(evecsQ.T) - cov)**2).mean(axis=None)

4.799207983494901e-21

In [24]:
((evecsQS.dot(np.diag(evalsQS)).dot(evecsQS.T) - cov)**2).mean(axis=None)

3.8279682282709186e-27

<h2>Wydajność</h2>

<h4>H<sup>T</sup>H</h4>

In [25]:
A = np.random.random((10,3))

In [26]:
%%timeit -n 1000
cov = np.dot(A.T, A)
eigen_decomposition(cov)

88.7 µs ± 10.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [27]:
%%timeit -n 1000
svd_decomposition(A)

42 µs ± 3.65 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [28]:
%%timeit -n 1000
cov = np.dot(A.T, A)
qr_decomposition(cov)

KeyboardInterrupt: 

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A)

In [None]:
A = np.random.random((100,4))

In [None]:
%%timeit -n 1000
cov = np.dot(A.T, A)
eigen_decomposition(cov)

In [None]:
%%timeit -n 1000
svd_decomposition(A)

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A)

In [None]:
A = np.random.random((500,20))

In [None]:
%%timeit -n 1000
cov = np.dot(A.T, A)
eigen_decomposition(cov)

In [None]:
%%timeit -n 1000
svd_decomposition(A)

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A)

<h4>HH<sup>T</sup></h4>

In [None]:
A = np.random.random((10,3))

In [None]:
%%timeit -n 1000
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 1000
svd_decomposition(A, True)

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A, True)

In [None]:
A = np.random.random((100,4))

In [None]:
%%timeit -n 1000
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 1000
svd_decomposition(A, True)

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A, True)

In [None]:
A = np.random.random((500,20))

In [None]:
%%timeit -n 1000
cov = np.dot(A, A.T)
eigen_decomposition(cov)

In [None]:
%%timeit -n 1000
svd_decomposition(A, True)

In [None]:
%%timeit -n 1000
qrsvd_decomposition(A, True)

<h1>Prep data</h1>

In [None]:
from nssPCA import preprocessing

data = np.array([[14, 17, 12, 33, 44],   
                [15, 6, 27, 8, 19],  
                [23, 2, 54, 1, 4, ]])

<h2>Mean-center</h2>

In [None]:
scaler = preprocessing.Scaler(calc_mean=True, calc_std=False)
scaler.transform(data).round(2)

In [None]:
scaler.mean_vector

In [None]:
scaler.std_vector

<h2>Standardize</h2>

In [None]:
scaler = preprocessing.Scaler()
scaler.transform(data).round(2)

In [None]:
scaler.mean_vector

In [None]:
scaler.std_vector

In [None]:
scaler.transform(data).mean(axis=0).round(2)

In [None]:
scaler.transform(data).var(axis=0).round(2)

<h2>ToSymmetric</h2>

In [None]:
squarer = preprocessing.Squarer(axis=0)
squarer.transform(data)

In [None]:
squarer = preprocessing.Squarer(axis=1)
squarer.transform(data)

<h2>Covariance</h2>

In [None]:
covariance = preprocessing.Covariance()
covariance.transform(scaler.transform(data))

In [None]:
covariance.transform(scaler.transform(data), biased=False)