<a href="https://colab.research.google.com/github/rupeshthapa123/NotebookProject/blob/main/Rupesh_Thapa_DimensionalityReduction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Dimensionality Reduction

In this lab, you will understand the process behind reducing datasets dimensionality while preserving variance.

Using the class slides as references, implement a synthetic dataset in 3D and reduce it to 2D using both SVD and PCA methods.

Include your steps below in text and provide the corresponding Python code blocks.




write your steps here...

In [None]:
import numpy as np

np.random.seed(4)
m = 60
w1, w2 = 0.1, 0.3
noise = 0.1

angles = np.random.rand(m) * 3 * np.pi / 2 - 0.5
X = np.empty((m, 3))
X[:, 0] = np.cos(angles) + np.sin(angles)/2 + noise * np.random.randn(m) / 2
X[:, 1] = np.sin(angles) * 0.7 + noise * np.random.randn(m) / 2
X[:, 2] = X[:, 0] * w1 + X[:, 1] * w2 + noise * np.random.randn(m)

In [None]:
print(X[:, 2])

[-0.26132626  0.03507755 -0.24920288  0.02182381  0.19152496  0.31710572
 -0.13368695 -0.00787835  0.46783297  0.2340159  -0.12152034  0.06972972
 -0.15761101 -0.13927911  0.09790432  0.06433843 -0.05926516  0.24642318
 -0.02162333 -0.38573526  0.39019099 -0.47610118 -0.18764369 -0.17189644
  0.26579776  0.02223257  0.11210787  0.17826927 -0.07641252  0.37436649
  0.14346668 -0.37070336  0.13736259  0.09297312  0.2668735   0.32415098
  0.15571838 -0.06928305  0.13122889  0.27325686  0.25979142  0.15964958
  0.1002999   0.11781711  0.25192037  0.17445205 -0.07109776  0.31705263
  0.28640517 -0.10303808 -0.13498243  0.25913415  0.0903395   0.23025679
  0.07305738  0.14297884 -0.33508853  0.43823703  0.05828838  0.25317605]


In [None]:
X_centered = X - X.mean(axis=0)
U, s, Vt = np.linalg.svd(X_centered)
c1 = Vt.T[:, 0]
c2 = Vt.T[:, 1]

m, n = X.shape

S = np.zeros(X_centered.shape)
S[:n, :n] = np.diag(s)
np.allclose(X_centered, U.dot(S).dot(Vt))
W2 = Vt.T[:, :2]
X2D = X_centered.dot(W2)

X2D_using_svd = X2D

In [None]:
print(X2D_using_svd)

[[-1.26203346 -0.42067648]
 [ 0.08001485  0.35272239]
 [-1.17545763 -0.36085729]
 [-0.89305601  0.30862856]
 [-0.73016287  0.25404049]
 [ 1.10436914 -0.20204953]
 [-1.27265808 -0.46781247]
 [ 0.44933007 -0.67736663]
 [ 1.09356195  0.04467792]
 [ 0.66177325  0.28651264]
 [-1.04466138  0.11244353]
 [ 1.05932502 -0.31189109]
 [-1.13761426 -0.14576655]
 [-1.16044117 -0.36481599]
 [ 1.00167625 -0.39422008]
 [-0.2750406   0.34391089]
 [ 0.45624787 -0.69707573]
 [ 0.79706574  0.26870969]
 [ 0.66924929 -0.65520024]
 [-1.30679728 -0.37671343]
 [ 0.6626586   0.32706423]
 [-1.25387588 -0.56043928]
 [-1.04046987  0.08727672]
 [-1.26047729 -0.1571074 ]
 [ 1.09786649 -0.38643428]
 [ 0.7130973  -0.64941523]
 [-0.17786909  0.43609071]
 [ 1.02975735 -0.33747452]
 [-0.94552283  0.22833268]
 [ 0.80994916  0.33810729]
 [ 0.20189175  0.3514758 ]
 [-1.34219411 -0.42415687]
 [ 0.13599883  0.37258632]
 [ 0.8206931  -0.55120835]
 [ 0.90818634 -0.31869127]
 [ 0.06703671  0.42486148]
 [ 0.13936893  0.41906961]
 

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=2)
X2D = pca.fit_transform(X)

X2D[:5]

array([[ 1.26203346,  0.42067648],
       [-0.08001485, -0.35272239],
       [ 1.17545763,  0.36085729],
       [ 0.89305601, -0.30862856],
       [ 0.73016287, -0.25404049]])

In [None]:
X2D_using_svd[:5]

array([[-1.26203346, -0.42067648],
       [ 0.08001485,  0.35272239],
       [-1.17545763, -0.36085729],
       [-0.89305601,  0.30862856],
       [-0.73016287,  0.25404049]])

In [None]:
pca.explained_variance_ratio_

array([0.84248607, 0.14631839])