In [11]:
import numpy as np
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

In [8]:
dataset=datasets.load_breast_cancer()

In [9]:
X=dataset.data
Y=dataset.target

In [12]:
std=StandardScaler()
x_std=std.fit_transform(X)

In [13]:
x_train,x_test,y_train,y_test=train_test_split(x_std,Y,random_state=0)

## STEP 1 : FIND THE COVARIANCE MATRIX

In [14]:
X.shape

(569, 30)

In [16]:
cov=np.cov(X)
cov.shape

(569, 569)

##### The size of covariance matrix is n x n , not m x m ; where m is the number of data points and n is the number of features

In [19]:
cov=np.cov(X.T) # cov is the covariance matrix
cov.shape

(30, 30)

## STEP 2 : FIND THE EIGENVALUES AND EIGENVECTORS

In [23]:
eig_values,eig_vectors = np.linalg.eig(cov)

In [26]:
eig_val_vector_pair = []
for i in range(len(eig_val)):
    eig_vec = eig_vectors[:,i]
    eig_val_vector_pair.append((eig_val[i],eig_vec))
eig_val_vector_pair.sort(reverse=True) # For sorting in reverse order
eig_val_vector_pair

[(443782.6051465967,
  array([ 5.08623202e-03,  2.19657026e-03,  3.50763298e-02,  5.16826469e-01,
          4.23694535e-06,  4.05260047e-05,  8.19399539e-05,  4.77807775e-05,
          7.07804332e-06, -2.62155251e-06,  3.13742507e-04, -6.50984008e-05,
          2.23634150e-03,  5.57271669e-02, -8.05646029e-07,  5.51918197e-06,
          8.87094462e-06,  3.27915009e-06, -1.24101836e-06, -8.54530832e-08,
          7.15473257e-03,  3.06736622e-03,  4.94576447e-02,  8.52063392e-01,
          6.42005481e-06,  1.01275937e-04,  1.68928625e-04,  7.36658178e-05,
          1.78986262e-05,  1.61356159e-06])),
 (7310.100061653261,
  array([ 9.28705650e-03, -2.88160658e-03,  6.27480827e-02,  8.51823720e-01,
         -1.48194356e-05, -2.68862249e-06,  7.51419574e-05,  4.63501038e-05,
         -2.52430431e-05, -1.61197148e-05, -5.38692831e-05,  3.48370414e-04,
          8.19640791e-04,  7.51112451e-03,  1.49438131e-06,  1.27357957e-05,
          2.86921009e-05,  9.36007477e-06,  1.22647432e-05,  2.89

In [36]:
n_eigen_vectors=[]
for i in range(len(eig_val)):
    n_eigen_vectors.append(eig_val_vector_pair[i][1])

In [45]:
k = 15 # k is the number of features to be selected
n_eigen_vectors = n_eigen_vectors[0:k]

In [52]:
for i in range(k):
    curr=n_eigen_vectors[i]
    n_eigen_vectors[i]=curr.T
n_eigen_vectors

[array([ 5.08623202e-03,  2.19657026e-03,  3.50763298e-02,  5.16826469e-01,
         4.23694535e-06,  4.05260047e-05,  8.19399539e-05,  4.77807775e-05,
         7.07804332e-06, -2.62155251e-06,  3.13742507e-04, -6.50984008e-05,
         2.23634150e-03,  5.57271669e-02, -8.05646029e-07,  5.51918197e-06,
         8.87094462e-06,  3.27915009e-06, -1.24101836e-06, -8.54530832e-08,
         7.15473257e-03,  3.06736622e-03,  4.94576447e-02,  8.52063392e-01,
         6.42005481e-06,  1.01275937e-04,  1.68928625e-04,  7.36658178e-05,
         1.78986262e-05,  1.61356159e-06]),
 array([ 9.28705650e-03, -2.88160658e-03,  6.27480827e-02,  8.51823720e-01,
        -1.48194356e-05, -2.68862249e-06,  7.51419574e-05,  4.63501038e-05,
        -2.52430431e-05, -1.61197148e-05, -5.38692831e-05,  3.48370414e-04,
         8.19640791e-04,  7.51112451e-03,  1.49438131e-06,  1.27357957e-05,
         2.86921009e-05,  9.36007477e-06,  1.22647432e-05,  2.89683790e-07,
        -5.68673345e-04, -1.32152605e-02, -1