# Feature Extraction

## Reducing Features using Principle Component Analysis

In [1]:
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn import datasets


In [2]:
digits = datasets.load_digits()

In [3]:
digits.data

array([[ 0.,  0.,  5., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ..., 10.,  0.,  0.],
       [ 0.,  0.,  0., ..., 16.,  9.,  0.],
       ...,
       [ 0.,  0.,  1., ...,  6.,  0.,  0.],
       [ 0.,  0.,  2., ..., 12.,  0.,  0.],
       [ 0.,  0., 10., ..., 12.,  1.,  0.]])

In [4]:
digits.data.shape

(1797, 64)

In [5]:
features = StandardScaler().fit_transform(digits.data)

In [6]:
features

array([[ 0.        , -0.33501649, -0.04308102, ..., -1.14664746,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -1.09493684, ...,  0.54856067,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -1.09493684, ...,  1.56568555,
         1.6951369 , -0.19600752],
       ...,
       [ 0.        , -0.33501649, -0.88456568, ..., -0.12952258,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649, -0.67419451, ...,  0.8876023 ,
        -0.5056698 , -0.19600752],
       [ 0.        , -0.33501649,  1.00877481, ...,  0.8876023 ,
        -0.26113572, -0.19600752]])

In [7]:
pca = PCA(n_components=0.99,whiten=True)

In [8]:
features_pca = pca.fit_transform(features)

In [9]:
features_pca

array([[ 0.70631939, -0.39512814, -1.73816236, ...,  0.36526417,
        -0.31369006,  0.05355504],
       [ 0.21732591,  0.38276482,  1.72878893, ..., -0.17818068,
        -0.14031747,  1.18179755],
       [ 0.4804351 , -0.13130437,  1.33172761, ..., -0.01924571,
        -0.23580029,  0.92966158],
       ...,
       [ 0.37732433, -0.0612296 ,  1.0879821 , ..., -1.05526847,
         1.75559618, -0.87894699],
       [ 0.39705007, -0.15768102, -1.08160094, ...,  0.10442881,
         0.65907949,  1.1292155 ],
       [-0.46407544, -0.92213976,  0.12493334, ..., -1.10593026,
         0.54434185, -0.26573597]])

In [11]:
print("Original number of features:",features.shape[1])
print("Reduced number of features:",features_pca.shape[1])

Original number of features: 64
Reduced number of features: 54
