## 1. Import the libraries and prepare dataset

In [95]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

dataset = pd.read_csv('wine.csv')
x = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=0)

# Standardize dataset
sc = StandardScaler()
x_train = sc.fit_transform(x_train)
x_test = sc.transform(x_test)

## 2a. Getting a sense of SVD

To calculate the SVD, imagine having matrix `A`,

<img src="https://miro.medium.com/max/1400/1*4DncmDEnF9SIYTTrDR0Adw.png" width="650" height="auto" />

Calculate the following eigenvalues and vectors (normalized) from `A^T ⋅ A` and `A ⋅ A^T`,

<img src="https://miro.medium.com/max/1400/1*RG2fiVNyPDr77eS4qsAjLg.jpeg" width="750" height="auto" />

These will be used to form the matrices for `U`, `S` and `V^T`.

<img src="https://miro.medium.com/max/1400/1*xnlAa8E-c63HnMcTRcb5HA.jpeg" width="600" height="auto" />

(Source: [Jonathan Hui, 2019](https://jonathan-hui.medium.com/machine-learning-singular-value-decomposition-svd-principal-component-analysis-pca-1d45e885e491))

Credits: [Jason Brownlee](https://machinelearningmastery.com/singular-value-decomposition-for-machine-learning)

In [96]:
from scipy.linalg import svd

# A = U * S * V^T
A = x_train
U, S, VT = svd(A)

print(f'A matrix (m × n) = {A.shape}')
print(f'U matrix (m × n) = {U.shape}')
print(f'S matrix (m × n) = {S.shape}')
print(f'V^T matrix (m × n) = {VT.shape}')

A matrix (m × n) = (142, 13)
U matrix (m × n) = (142, 142)
S matrix (m × n) = (13,)
V^T matrix (m × n) = (13, 13)


## 2b. Reformulating SVD

Reformulating it, we can represent it as such,

<img src="https://miro.medium.com/max/1400/1*0LEG-KOZYYYsaXnQxMCkXA.gif" width="500" height="auto" />

And each vectors can be formulated as,

<img src="https://miro.medium.com/max/1400/1*KGcqnL20ihPN4RDyLhQzXA.jpeg" width="600" height="auto" />

Giving this linear combination,

<img src="https://miro.medium.com/max/1400/1*0n2-o06c_j42d0MJo7igYQ.gif" width="500" height="auto" />

(Source: [Jonathan Hui, 2019](https://jonathan-hui.medium.com/machine-learning-singular-value-decomposition-svd-principal-component-analysis-pca-1d45e885e491))

In [97]:
# Reconstruct the matrix and gives back the same, since the last element is 0
# This calculates the same way as 2a.
# Convert U(m × m) . sigma(n × n) . V^T(n × n)
# Into    U(m × m) . sigma(m × n) . V^T(n × n)
# Create the m × n matrix
sigma = np.zeros(A.shape)
# Populate the n × n matrix
sigma[:A.shape[1], :A.shape[1]] = np.diag(S)
# Reconstruct matrix
B = U.dot(sigma.dot(VT))
print('\nReconstructed matrix B,\n')
print(f'A(1) = {A[0]}\n')
print(f'B(1) = {B[0]}\n')
print(f'A(n) = {A[-1]}\n')
print(f'B(n) = {B[-1]}\n')


Reconstructed matrix B,

A(1) = [ 0.87668336  0.79842885  0.64412971  0.12974277  0.48853231 -0.70326216
 -1.42846826  1.0724566  -1.36820277  0.35193216  0.0290166  -1.06412236
 -0.2059076 ]

B(1) = [ 0.87668336  0.79842885  0.64412971  0.12974277  0.48853231 -0.70326216
 -1.42846826  1.0724566  -1.36820277  0.35193216  0.0290166  -1.06412236
 -0.2059076 ]

A(n) = [ 1.4610222   0.12361993  0.42085937  0.12974277 -0.63831583 -0.94935192
 -1.28450624  0.60097413 -0.62134527  1.96237659 -1.45501034 -1.2168803
 -0.2719767 ]

B(n) = [ 1.4610222   0.12361993  0.42085937  0.12974277 -0.63831583 -0.94935192
 -1.28450624  0.60097413 -0.62134527  1.96237659 -1.45501034 -1.2168803
 -0.2719767 ]



## 2c. Pseudo-Inverse SVD

For a linear equation system, compute the inverse of a square matrix `A` to solve `x` where,

<img src="https://miro.medium.com/max/1400/1*a1inq-_XL9WHTCsxzHpamQ.jpeg" width="600" height="auto" />

But as not all matrices are invertible, it will be unlikely to find an exact solution. Hence to find the best-fit solution, compute the following pseudoinverse `A+` which minimizes the least square error,

<img src="https://miro.medium.com/max/1400/1*aQ2MqySUZnIbCIrxfH1G8Q.png" width="600" height="auto" />

Which solution for `x` can be estimated as,

<img src="https://miro.medium.com/max/1400/1*lF1z-LodZHA3834kseswYw.jpeg" width="600" height="auto" />

Leading to,

<img src="https://miro.medium.com/max/1400/1*ClzObIIjZyQDb9svX4FjjQ.jpeg" width="600" height="auto" />

<img src="https://miro.medium.com/max/1400/1*mu9Z_NVYd_3CXwhbERVB8Q.jpeg" width="600" height="auto" />

For illustration purposes (see how the `D+` is constructed for none invertible matrices),

<img src="https://miro.medium.com/max/1400/1*xxatolWVNPjMCUEEWLfvyg.jpeg" width="600" height="auto" />

(Source: [Jonathan Hui, 2019](https://jonathan-hui.medium.com/machine-learning-singular-value-decomposition-svd-principal-component-analysis-pca-1d45e885e491))

In [110]:
# B = A+
A_PI = np.linalg.pinv(A)

print(f'A matrix (m × n) = {A.shape}')
print(f'A+ matrix (m × n) = {A_PI.shape}')
print('\nReconstructed matrix A+,\n')
print(f'A(1) = {A[0]}\n')
print(f'A+(1) = {A_PI[:,0].flatten()}\n')
print(f'A(n) = {A[-1]}\n')
print(f'A+(n) = {A_PI[:,-1].flatten()}\n')

A matrix (m × n) = (142, 13)
A+ matrix (m × n) = (13, 142)

Reconstructed matrix A+,

A(1) = [ 0.87668336  0.79842885  0.64412971  0.12974277  0.48853231 -0.70326216
 -1.42846826  1.0724566  -1.36820277  0.35193216  0.0290166  -1.06412236
 -0.2059076 ]

A+(1) = [ 0.01306503  0.00318994  0.00712028 -0.00720144  0.00405339  0.01426846
 -0.03178858  0.00139091 -0.0028464  -0.00606619  0.01362248 -0.00143495
 -0.00448885]

A(n) = [ 1.4610222   0.12361993  0.42085937  0.12974277 -0.63831583 -0.94935192
 -1.28450624  0.60097413 -0.62134527  1.96237659 -1.45501034 -1.2168803
 -0.2719767 ]

A+(n) = [ 0.01632071 -0.01043116  0.00713514 -0.00448943 -0.00863543 -0.0025722
 -0.02188495 -0.00690779  0.0079907   0.00881534 -0.00165585  0.00523781
 -0.01022904]



In [111]:
# This calculates the same way as 2c.
# Populate D with n × n diagonal matrix based on A's m × n
D = zeros(A.shape)
D[:A.shape[1], :A.shape[1]] = diag(1.0 / S)
# Calculate pseudoinverse
A_PI = VT.T.dot(D.T).dot(U.T)

print(f'A matrix (m × n) = {A.shape}')
print(f'A+ matrix (m × n) = {A_PI.shape}')

A matrix (m × n) = (142, 13)
A+ matrix (m × n) = (13, 142)


## 3. Dimensionality Reduction

In [104]:
from numpy import array
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=2)
svd.fit(A)

T = svd.transform(A)
print(f'T matrix (m × n) = {T.shape}')
print(f'\nT(1) = {T[0]}\n')
print(f'T(n) = {T[-1]}\n')

T matrix (m × n) = (142, 2)

T(1) = [-2.17884511 -1.07218467]

T(n) = [-2.44830439 -2.11360296]



In [108]:
from numpy import array
from numpy import diag
from numpy import zeros
from scipy.linalg import svd

# T = V^k . A
# T = U . Sigma^k
components_n = 2

sigma_k = sigma[:, :components_n]
VT_k = VT[:components_n, :]

print(f'V^T matrix (m × n) = {VT.shape}')
print(f'V^Tk matrix (m × n) = {VT_k.shape}')
print(f'S matrix (m × n) = {sigma.shape}')
print(f'Sk matrix (m × n) = {sigma_k.shape}')

# Reconstruct
B_k = U.dot(sigma_k.dot(VT_k))

print(f'\nA matrix (m × n) = {A.shape}')
print(f'B matrix (m × n) = {B_k.shape}')
print('\nReconstructed matrix B,\n')
print(f'A(1) = {A[0]}\n')
print(f'Bk(1) = {B_k[0]}\n')
print(f'A(n) = {A[-1]}\n')
print(f'Bk(n) = {B_k[-1]}\n')

T_k = U.dot(sigma_k)
print('T = U . Sk (m × n): ', T_k.shape)
print(f'T(1) = {T_k[0]}')
print(f'T(n) = {T_k[-1]}\n')

T_k = A.dot(VT_k.T)
print('T = A . V^Tk (m × n): ', T_k.shape)
print(f'T(1) = {T_k[0]}')
print(f'T(n) = {T_k[-1]}')

V^T matrix (m × n) = (13, 13)
V^Tk matrix (m × n) = (2, 13)
S matrix (m × n) = (142, 13)
Sk matrix (m × n) = (142, 2)

A matrix (m × n) = (142, 13)
B matrix (m × n) = (142, 13)

Reconstructed matrix B,

A(1) = [ 0.87668336  0.79842885  0.64412971  0.12974277  0.48853231 -0.70326216
 -1.42846826  1.0724566  -1.36820277  0.35193216  0.0290166  -1.06412236
 -0.2059076 ]

Bk(1) = [ 0.25164834  0.78144298  0.35990517  0.4991524   0.00146148 -0.74057914
 -0.9091325   0.70745757 -0.59821518  0.80945717 -0.96035831 -0.9923469
 -0.22063686]

A(n) = [ 1.4610222   0.12361993  0.42085937  0.12974277 -0.63831583 -0.94935192
 -1.28450624  0.60097413 -0.62134527  1.96237659 -1.45501034 -1.2168803
 -0.2719767 ]

Bk(n) = [ 0.73542901  1.08864448  0.69066494  0.5397817   0.23649916 -0.74066258
 -1.00249893  0.83121646 -0.61088752  1.39202983 -1.32597784 -1.26112931
  0.08422657]

T = U . Sk (m × n):  (142, 2)
T(1) = [2.17884511 1.07218467]
T(n) = [2.44830439 2.11360296]

T = A . V^Tk (m × n):  (142, 2)


## 4. Sample Example

For example, we have a matrix contains the return of stock yields traded by different investors.

<img src="https://miro.medium.com/max/1400/1*FJXUrl22HERjCUe2dR42mA.gif" width="600" height="auto" />

As a fund manager, you want to identify the combination of stocks and investors that have the largest yields.

(Source: [Jonathan Hui, 2019](https://jonathan-hui.medium.com/machine-learning-singular-value-decomposition-svd-principal-component-analysis-pca-1d45e885e491))

<img src="https://i.stack.imgur.com/8JGdq.png" width="200" height="auto" />

(Source: [@jeffery_the_wind, 2013](https://math.stackexchange.com/questions/479918/trying-to-check-cov-matrix-calculation-from-svd-using-numpy))

In [181]:
stocks = ['GOOG', 'AMZN', 'FB', 'SNAP', 'APPL', 'TESLA', 'MSFT']
stock_yields = np.array([
    [0.5,  0.2,  0.1, 0.04, 0.3],
    [0,    0,    0.2, 0.8,  0.25],
    [0.5,  0.67, 0,   0.04, 0.1],
    [0.75, 0.2,  0.3, 0.15, 0.4],
    [0,    0.3,  0.6, 0.02, 0],
    [0.1,  0,  0.2, 0.04, 0.15],
    [0.3,    0,    0.4, 0.8,  0.2],
    [0.05,  0.7, 0,   0, 0.5],
    [0.2, 0,  0.3, 0.8, 0.6],
    [0,    0.3,  0.6, 0.1, 0.8],
    [0.8,    0.02,  0.3, 0.02, 0],
    [0.02,    0.01,  0.6, 0.025, 0.5],
    [0.95, 0.8,  0.3, 0.8, 0.8],
    [0.08,    0,  0.6, 0.01, 0.9],
    [0.02,    0,  0.35, 0.65, 0.5],
    [0.5,    0.1,  0.65, 0.1, 0.25],
    [0.01,    0.5,  0.55, 0.3, 0.15]  
])

cov = np.cov(stock_yields.T)

print(f"""Using numpy covariance
    cov shape: {cov.shape}
    cov[1]: {cov[0]}
    cov[n]: {cov[-1]}
""")

stock_yields_mean = stock_yields - stock_yields.mean(axis=0)

U, S, VT = svd(stock_yields_mean, full_matrices=0)

cov = np.dot(np.dot(VT.T, np.diag(S**2)), VT)
cov = cov / (stock_yields.shape[0] - 1)

print(f"""Using SVD covariance
    cov shape: {cov.shape}
    cov[1]: {cov[0]}
    cov[n]: {cov[-1]}
""")

print('Displaying stocks and their std. covariance\n', S)

print('\nDisplaying stocks covariance matrix\n', cov)

print('\nSelecting best stocks with higher combination yields\n', S[0:2], stocks[0:2])

print('\nGives us related covariance vectors (relationship amongst variables)\n', cov[:,0:2])

Using numpy covariance
    cov shape: (5, 5)
    cov[1]: [ 0.10379853  0.02392059 -0.0189761   0.00497978 -0.00631434]
    cov[n]: [-0.00631434  0.00696324  0.01202206  0.01818566  0.07878676]

Using SVD covariance
    cov shape: (5, 5)
    cov[1]: [ 0.10379853  0.02392059 -0.0189761   0.00497978 -0.00631434]
    cov[n]: [-0.00631434  0.00696324  0.01202206  0.01818566  0.07878676]

Displaying stocks and their std. covariance
 [1.42911619 1.39863859 1.13716518 0.98471829 0.69214407]

Displaying stocks covariance matrix
 [[ 0.10379853  0.02392059 -0.0189761   0.00497978 -0.00631434]
 [ 0.02392059  0.07812426 -0.02002206 -0.00925129  0.00696324]
 [-0.0189761  -0.02002206  0.04652574 -0.00608548  0.01202206]
 [ 0.00497978 -0.00925129 -0.00608548  0.11404228  0.01818566]
 [-0.00631434  0.00696324  0.01202206  0.01818566  0.07878676]]

Selecting best stocks with higher combination yields
 [1.42911619 1.39863859] ['GOOG', 'AMZN']

Gives us related covariance vectors (relationship amongst var