### Chapter 8 - Dimensionality Reduction

#### Building a simple 3D dataset:

In [4]:
import numpy as np

#setting seed
np.random.seed(42)

#initializing variables
m= 60
w1, w2 = 0.1, 0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5
X = np.empty((m, 3))

X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2
X[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

X.shape


(60, 3)

#### Principal Components

In [9]:
#finding X relative to center as required by PCA
X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)

print("VT: ",Vt) # we get 3 pc for the 3 dimesnional feature space

#first two principal components obtained with transpose
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]

print("PC1: ", c1)
print("PC2: ", c2)


VT:  [[-0.95250178 -0.24902446 -0.17529172]
 [ 0.29267159 -0.9076305  -0.30091563]
 [ 0.08416476  0.33792558 -0.93740205]]
PC1:  [-0.95250178 -0.24902446 -0.17529172]
PC2:  [ 0.29267159 -0.9076305  -0.30091563]


In [12]:
m, n = X.shape #m has 60 and n has 3

S = np.zeros(X_centered.shape) #A 60 x 3 matrix of zeroes
S[:n, :n] = np.diag(s) #filling diagonal elements with values in s
S

array([[7.05108078, 0.        , 0.        ],
       [0.        , 2.81608982, 0.        ],
       [0.        , 0.        , 0.7518544 ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.        , 0.        , 0.        ],
       [0.

In [13]:
np.allclose(X_centered, U.dot(S).dot(Vt)) #checking if the two matrices are almost similar

True

#### Projecting Down to d Dimensions

In [18]:
W2 = Vt.T[:, :2] #projecting down to 2 dimensions
print(W2)
X2D = X_centered.dot(W2) #obtain lower dimensional feature space using dot product
X2D

[[-0.95250178  0.29267159]
 [-0.24902446 -0.9076305 ]
 [-0.17529172 -0.30091563]]


array([[-0.690074  , -0.36150744],
       [ 1.39636097,  0.34497714],
       [ 1.00728461, -0.35025708],
       [ 0.2736333 , -0.50516373],
       [-0.91324535,  0.26290852],
       [-0.79710027,  0.26718188],
       [-0.55173939,  0.65062721],
       [ 1.41612959,  0.16567641],
       [ 0.40776059, -0.46053322],
       [ 0.85209856, -0.40516935],
       [-0.46269946,  0.61952736],
       [ 1.2826692 ,  0.41018903],
       [ 1.37468032,  0.03618608],
       [-0.96941594,  0.10932241],
       [-0.97219266,  0.14390464],
       [-1.05216924,  0.07740862],
       [-0.92770444, -0.22364286],
       [-0.01473543, -0.4153169 ],
       [-0.47010859, -0.46920058],
       [-0.87761843, -0.08515546],
       [ 0.38973612, -0.45189716],
       [-0.96989867,  0.19819051],
       [-0.93689997, -0.09307933],
       [-0.81304146, -0.26096051],
       [-0.41368569, -0.42009096],
       [ 1.2830484 , -0.02603822],
       [-0.95210787,  0.23163682],
       [-0.2005476 , -0.49130242],
       [ 0.33988682,

X2D_using_svd = X2D

#### Using Scikit Learn