# Singular Value Decomposition -> SVD from Scratch

In [1]:
from scipy.linalg import svd

In [2]:
import numpy as np

In [3]:
A = np.array([
    [1,2],
    [3,4],
    [5,6]
])

In [4]:
A

array([[1, 2],
       [3, 4],
       [5, 6]])

In [5]:
U, Sigma, VT = svd(A)
U

array([[-0.2298477 ,  0.88346102,  0.40824829],
       [-0.52474482,  0.24078249, -0.81649658],
       [-0.81964194, -0.40189603,  0.40824829]])

In [6]:
Sigma

array([9.52551809, 0.51430058])

In [7]:
VT

array([[-0.61962948, -0.78489445],
       [-0.78489445,  0.61962948]])

In [8]:
sigma = np.zeros((A.shape[0],A.shape[1]))
sigma

array([[0., 0.],
       [0., 0.],
       [0., 0.]])

In [9]:
sigma[:A.shape[1],:A.shape[1]] = np.diag(Sigma)
sigma

array([[9.52551809, 0.        ],
       [0.        , 0.51430058],
       [0.        , 0.        ]])

In [10]:
from numpy import dot
A_rec = U.dot(sigma.dot(VT))
A_rec

array([[1., 2.],
       [3., 4.],
       [5., 6.]])

# SVD for Dimensionality Reduction

In [11]:
# define a matrix
A = np.array([
    [1,2,3,4,5,6,7,8,9,10],
    [11,12,13,14,15,16,17,18,19,20],
    [21,22,23,24,25,26,27,28,29,30]
])
print(A)

[[ 1  2  3  4  5  6  7  8  9 10]
 [11 12 13 14 15 16 17 18 19 20]
 [21 22 23 24 25 26 27 28 29 30]]


In [12]:
U, Sigma, VT = svd(A)
U

array([[-0.19101157, -0.89266338,  0.40824829],
       [-0.51371859, -0.26348917, -0.81649658],
       [-0.8364256 ,  0.36568503,  0.40824829]])

In [13]:
Sigma

array([9.69657342e+01, 7.25578339e+00, 1.48879510e-15])

In [14]:
VT

array([[-0.24139304, -0.25728686, -0.27318068, -0.2890745 , -0.30496832,
        -0.32086214, -0.33675595, -0.35264977, -0.36854359, -0.38443741],
       [ 0.53589546,  0.42695236,  0.31800926,  0.20906617,  0.10012307,
        -0.00882003, -0.11776313, -0.22670623, -0.33564933, -0.44459242],
       [ 0.09975293, -0.01037753,  0.23987452,  0.05273845,  0.0996209 ,
        -0.46517509, -0.52300449, -0.23591804,  0.593137  ,  0.14935136],
       [-0.259848  , -0.19489762, -0.10988774,  0.9273013 , -0.07153728,
        -0.05251768, -0.04829915, -0.05415057, -0.07582496, -0.0603383 ],
       [-0.26312174, -0.0585767 , -0.12150761, -0.05740164,  0.9302533 ,
        -0.0441965 , -0.05005435, -0.07728074, -0.13808364, -0.12003039],
       [-0.34412869, -0.09157816,  0.43274424, -0.05958094, -0.05492318,
         0.70339002, -0.33412339, -0.23272698,  0.08693936, -0.10601229],
       [-0.3607094 ,  0.01575684,  0.51799467, -0.04727562, -0.05090152,
        -0.33292338,  0.61579275, -0.27850803

In [15]:
# create m x n Sigma matrix
sigma = np.zeros((A.shape[0], A.shape[1]))

In [16]:
# populate Sigma with n x n diagonal matrix
sigma[:A.shape[0], :A.shape[0]] = np.diag(Sigma)
sigma

array([[9.69657342e+01, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 7.25578339e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00],
       [0.00000000e+00, 0.00000000e+00, 1.48879510e-15, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00, 0.00000000e+00, 0.00000000e+00,
        0.00000000e+00, 0.00000000e+00]])

In [17]:
# select
n_elements = 2
sigma = sigma[:, :n_elements]
VT = VT[:n_elements, :]
VT

array([[-0.24139304, -0.25728686, -0.27318068, -0.2890745 , -0.30496832,
        -0.32086214, -0.33675595, -0.35264977, -0.36854359, -0.38443741],
       [ 0.53589546,  0.42695236,  0.31800926,  0.20906617,  0.10012307,
        -0.00882003, -0.11776313, -0.22670623, -0.33564933, -0.44459242]])

In [18]:
# reconstruct
B = U.dot(sigma.dot(VT))
sigma

array([[96.96573419,  0.        ],
       [ 0.        ,  7.25578339],
       [ 0.        ,  0.        ]])

In [19]:
Sigma

array([9.69657342e+01, 7.25578339e+00, 1.48879510e-15])

In [25]:
B

array([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
       [11., 12., 13., 14., 15., 16., 17., 18., 19., 20.],
       [21., 22., 23., 24., 25., 26., 27., 28., 29., 30.]])

In [26]:
A

array([[ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10],
       [11, 12, 13, 14, 15, 16, 17, 18, 19, 20],
       [21, 22, 23, 24, 25, 26, 27, 28, 29, 30]])

In [27]:
# transform
T = U.dot(Sigma)
print(T)

[-24.99854961 -51.72492049 -78.45129137]


In [28]:
T = A.dot(VT.T)
print(T)

[[-18.52157747  -6.47697214]
 [-49.81310011  -1.91182038]
 [-81.10462276   2.65333138]]


### 1. Two words ‘black’ and ‘brown’ are encoded in integer form as [3 2 2] and [2 3 -2] respectively. Compute the SVD on the combined integer features and report the results.
### 2. Compute SVD on two-word vectors represented as integers [-3 6 6].T and [1 -2 -2].T. where ‘.T’ is the python representation of transpose.
### 3. Two words in two documents are one – hot encoded as [1 0 1 0] and [0 1 0 1]. Compute the SVD transformation and find the semantic representation of the two words.


# ALM: Impliment the steps of SVD using python Code and verify the results with the sklearn API.