# PCA

In [None]:
import numpy as np

from sklearn.datasets import make_blobs

## Data

In [None]:
data = make_blobs(25, 3, random_state=123)[0]
data

array([[ 0.78456734,  4.27647404, -3.19856658],
       [ 8.53714456,  4.85977931,  0.1411281 ],
       [ 2.62789474, -2.06978587, -4.94022846],
       [ 4.6759643 , -5.37819856, -6.87327213],
       [ 3.15995024, -3.70046728, -5.33644501],
       [-0.67612822,  5.15702087, -1.16719175],
       [-1.0059971 ,  4.97188769, -2.2590466 ],
       [ 5.59778532, -4.42058578, -6.08216183],
       [ 9.90183603,  4.56431257,  0.88615594],
       [ 9.74032363,  3.49894385, -1.10939689],
       [ 0.68165369,  3.6643471 , -0.49826463],
       [10.85365032,  4.61035172,  0.08490606],
       [11.45964918,  4.39215878, -0.56305117],
       [ 1.17915945, -4.51137085, -6.1589521 ],
       [-0.74783868,  6.75098066, -1.50287771],
       [ 2.31008365, -5.39117772, -5.91041165],
       [ 3.10175474,  4.51713709,  0.26201865],
       [ 4.39502847, -3.55229808, -3.9671444 ],
       [ 9.19302331,  4.70831521, -0.44811939],
       [ 1.30204279,  4.76023172, -0.36356365],
       [11.72312394,  3.46050532, -0.613

## Pre-processing: Normalization

In [None]:
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
data_scaled = scaler.fit_transform(data)
data_scaled

array([[-0.94576577,  0.6844099 , -0.21643981],
       [ 0.95223768,  0.81644815,  1.05591252],
       [-0.4944782 , -0.75214339, -0.87997577],
       [ 0.00693482, -1.50104293, -1.61642411],
       [-0.36421917, -1.121268  , -1.03092581],
       [-1.30337656,  0.8837324 ,  0.55747061],
       [-1.3841358 ,  0.84182525,  0.14149724],
       [ 0.23261712, -1.2842756 , -1.31502799],
       [ 1.28634453,  0.74956566,  1.33975221],
       [ 1.2468027 ,  0.50840646,  0.57948921],
       [-0.97096132,  0.5458475 ,  0.81231755],
       [ 1.51936985,  0.75998718,  1.03449312],
       [ 1.66773186,  0.71059655,  0.78763526],
       [-0.84916084, -1.30482591, -1.34428344],
       [-1.32093287,  1.24454463,  0.42958142],
       [-0.57228543, -1.50398092, -1.24959484],
       [-0.37846672,  0.73888691,  1.10196923],
       [-0.06184453, -1.0877281 , -0.50925154],
       [ 1.1128114 ,  0.78216241,  0.83142181],
       [-0.81907627,  0.79391434,  0.86363574],
       [ 1.73223635,  0.49970543,  0.768

## Creating PCA object with 2 components

In [None]:
from sklearn.decomposition import PCA

In [None]:
pca_object = PCA(n_components=2)

### Fitting and Transforming to component axis


In [None]:
pca_tran_data = pca_object.fit_transform(data_scaled)
pca_tran_data

array([[ 0.01807482,  1.10608589],
       [-1.56822185, -0.47742175],
       [ 1.25469279,  0.09490961],
       [ 2.08019503, -0.71579817],
       [ 1.55428532, -0.17625411],
       [-0.52517541,  1.5846139 ],
       [-0.18535702,  1.60382139],
       [ 1.65830181, -0.81544482],
       [-1.83034446, -0.78367908],
       [-1.13846716, -0.90920657],
       [-0.59153237,  1.17710707],
       [-1.70397914, -1.02458114],
       [-1.55117984, -1.20368352],
       [ 2.04841446,  0.17416174],
       [-0.66478221,  1.72172719],
       [ 2.02079789, -0.14619136],
       [-1.11095389,  0.72848976],
       [ 1.07410782, -0.39325395],
       [-1.44460117, -0.66008781],
       [-0.83734326,  1.13319685],
       [-1.42285382, -1.34299683],
       [ 2.01043412, -0.34369892],
       [ 2.32826422, -0.67780235],
       [-1.21198426, -0.76682762],
       [-0.26079242,  1.1128146 ]])

## Reconstructing the input with 2 principal components

In [None]:
reconstructed_data = pca_object.inverse_transform(pca_tran_data)
reconstructed_data

array([[-1.02843374,  0.39656563,  0.09387453],
       [ 0.95843181,  0.8380157 ,  1.03266133],
       [-0.50145291, -0.77642885, -0.85379451],
       [-0.02422682, -1.60954562, -1.49945134],
       [-0.34957373, -1.07027359, -1.08590102],
       [-1.29165918,  0.92453151,  0.51348658],
       [-1.42146519,  0.71184687,  0.28162217],
       [ 0.20700079, -1.37346994, -1.21887085],
       [ 1.32796966,  0.89450154,  1.18350215],
       [ 1.21587112,  0.40070479,  0.69559843],
       [-0.89307661,  0.81703679,  0.51995832],
       [ 1.50899404,  0.7238593 ,  1.07344126],
       [ 1.6241741 ,  0.55893137,  0.95113992],
       [-0.836433  , -1.26050843, -1.3920605 ],
       [-1.37237426,  1.06542919,  0.62267921],
       [-0.53119009, -1.36088974, -1.40385619],
       [-0.30709938,  0.98738316,  0.83407455],
       [ 0.00935445, -0.83981808, -0.77651423],
       [ 1.08652742,  0.69064339,  0.93008512],
       [-0.77143291,  0.95980528,  0.68479481],
       [ 1.71064258,  0.42451738,  0.849

In [None]:
np.round(reconstructed_data,4)

array([[-1.0284,  0.3966,  0.0939],
       [ 0.9584,  0.838 ,  1.0327],
       [-0.5015, -0.7764, -0.8538],
       [-0.0242, -1.6095, -1.4995],
       [-0.3496, -1.0703, -1.0859],
       [-1.2917,  0.9245,  0.5135],
       [-1.4215,  0.7118,  0.2816],
       [ 0.207 , -1.3735, -1.2189],
       [ 1.328 ,  0.8945,  1.1835],
       [ 1.2159,  0.4007,  0.6956],
       [-0.8931,  0.817 ,  0.52  ],
       [ 1.509 ,  0.7239,  1.0734],
       [ 1.6242,  0.5589,  0.9511],
       [-0.8364, -1.2605, -1.3921],
       [-1.3724,  1.0654,  0.6227],
       [-0.5312, -1.3609, -1.4039],
       [-0.3071,  0.9874,  0.8341],
       [ 0.0094, -0.8398, -0.7765],
       [ 1.0865,  0.6906,  0.9301],
       [-0.7714,  0.9598,  0.6848],
       [ 1.7106,  0.4245,  0.8495],
       [-0.3452, -1.4271, -1.4157],
       [-0.1411, -1.756 , -1.6664],
       [ 1.1085,  0.5008,  0.7598],
       [-0.9427,  0.5794,  0.2863]])