# Numpy

In [79]:
import pandas as pd
import numpy as np
from numpy.linalg import svd
from sklearn.decomposition import TruncatedSVD

### Tutorial

In [33]:
m = np.matrix([[1, 0, 0], 
               [1, 1, 0], 
               [0, 0, 1]])
u, s, v = np.linalg.svd(m)

In [34]:
u

matrix([[-0.52573111,  0.        , -0.85065081],
        [-0.85065081,  0.        ,  0.52573111],
        [ 0.        ,  1.        ,  0.        ]])

In [35]:
s

array([1.61803399, 1.        , 0.61803399])

In [36]:
v

matrix([[-0.85065081, -0.52573111, -0.        ],
        [ 0.        ,  0.        ,  1.        ],
        [-0.52573111,  0.85065081,  0.        ]])

In [37]:
np.dot(u, np.dot(np.diag(s), v))

matrix([[ 1.00000000e+00, -8.51707167e-17,  0.00000000e+00],
        [ 1.00000000e+00,  1.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00,  1.00000000e+00]])

### Advanced

In [6]:
a = np.random.randn(9, 6) + 1j*np.random.randn(9, 6)
b = np.random.randn(2, 7, 8, 3) + 1j*np.random.randn(2, 7, 8, 3)

In [25]:
u, s, vh = np.linalg.svd(a, full_matrices=True)

np.allclose(a, np.dot(u[:, :6] * s, vh))

smat = np.zeros((9, 6), dtype=complex)
smat[:6, :6] = np.diag(s)
np.allclose(a, np.dot(u, np.dot(smat, vh)))

True

In [70]:
X = np.random.random((5, 8))
X = X.round(2)

In [71]:
X_0mean = X - X.mean(0)
X_0mean.round(2)

array([[-0.02, -0.3 ,  0.38,  0.03, -0.2 ,  0.24,  0.23,  0.11],
       [-0.07, -0.3 ,  0.29,  0.01,  0.02, -0.3 , -0.51, -0.25],
       [-0.25,  0.01,  0.36,  0.15,  0.26, -0.01,  0.05,  0.08],
       [-0.01,  0.29, -0.54,  0.03, -0.01, -0.01,  0.49,  0.18],
       [ 0.35,  0.32, -0.48, -0.21, -0.05,  0.06, -0.26, -0.1 ]])

In [74]:
U, s, Vh = svd(X_0mean, full_matrices=False)

In [75]:
U

array([[-0.27827555,  0.45856512,  0.71573725,  0.00096885,  0.4472136 ],
       [-0.5069438 , -0.51472599, -0.14606446, -0.50668561,  0.4472136 ],
       [-0.28528379,  0.2826214 , -0.57217705,  0.557989  ,  0.4472136 ],
       [ 0.61006928,  0.3446224 , -0.26237227, -0.49011394,  0.4472136 ],
       [ 0.46043386, -0.57108294,  0.26487654,  0.4378417 ,  0.4472136 ]])

In [76]:
s

array([1.17639649e+00, 8.86787103e-01, 5.02282425e-01, 3.02079958e-01,
       3.24267571e-16])

In [77]:
Vh

array([[ 0.22732445,  0.47345593, -0.7700688 , -0.11441662, -0.04911604,
         0.09322978,  0.30558993,  0.11651856],
       [-0.27867014, -0.07119135,  0.24216819,  0.20441095, -0.00385443,
         0.25252511,  0.78875509,  0.36183848],
       [ 0.46643985, -0.33437577,  0.07600906, -0.25744545, -0.60813371,
         0.47748906,  0.02602774, -0.00844468],
       [ 0.17908205,  0.51400703,  0.36017909, -0.09265631,  0.3898263 ,
         0.58868506, -0.22332737,  0.13047015],
       [-0.36992165,  0.54478502,  0.22899244, -0.10609532, -0.65473657,
        -0.17421768, -0.14203411,  0.15319281]])

In [81]:
k = 3
svd_tr = TruncatedSVD(n_components=k)
svd_tr.fit(X_0mean)

TruncatedSVD(algorithm='randomized', n_components=3, n_iter=5,
             random_state=None, tol=0.0)