In [2]:
import numpy as np
import time
from sklearn import decomposition, linear_model, datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [3]:
breast_cancer  = datasets.load_breast_cancer()

In [4]:
X = breast_cancer.data
y = breast_cancer.target

In [5]:
X.shape  #30 - no of features

(569, 30)

In [7]:
#Applying feature scaling
sc = StandardScaler()
X_std = sc.fit_transform(X) 

In [9]:
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# We fit pca only on training data, and apply the same pca object on test data

In [10]:
pca = decomposition.PCA(n_components = 15)
X_train_pca = pca.fit_transform(X_train)  #fit_transform in case of training data
X_test_pca = pca.transform(X_test)  #transform in case of test data

In [11]:
lr = linear_model.LogisticRegression()

# Applying Logistic Regression on data without applying PCA

In [12]:
start = time.time()
lr.fit(X_train, y_train)
end = time.time()
print('Time' , end-start)
print('Score', lr.score(X_test, y_test))

Time 0.029983043670654297
Score 0.9574468085106383




In [13]:
start1 = time.time()
lr.fit(X_train_pca, y_train)
end1 = time.time()
print('Time' , end1-start1)
print('Score', lr.score(X_test_pca, y_test))

Time 0.009991168975830078
Score 0.9680851063829787




### So the time of fitting the model after applying PCA on data is very less as compared to without PCA

# Eigen Vectors and Eigen values

In [14]:
pca.explained_variance_  #these are the eigen values 

array([4.64358352e+05, 7.83940833e+03, 9.09369363e+02, 5.89143297e+01,
       3.72408842e+01, 2.79720614e+00, 1.78562684e+00, 3.87244346e-01,
       1.73561351e-01, 7.23506369e-02, 2.95910246e-02, 6.52862853e-03,
       3.21936552e-03, 2.05979930e-03, 1.34598698e-03])

In [15]:
pca.components_  #these are the eigen vectors

array([[ 5.06050279e-03,  1.98898203e-03,  3.47932830e-02,
         5.23458410e-01,  3.66198874e-06,  3.55975454e-05,
         7.73961028e-05,  4.57239855e-05,  7.19775632e-06,
        -3.12872164e-06,  3.21013140e-04, -4.40966563e-05,
         2.25429730e-03,  5.81698884e-02, -7.30100339e-07,
         4.24602930e-06,  7.96382032e-06,  2.93191472e-06,
        -8.02652800e-07, -2.05830046e-07,  7.02946973e-03,
         2.82751163e-03,  4.82333866e-02,  8.47925893e-01,
         5.68931058e-06,  8.47305434e-05,  1.55719477e-04,
         6.93292171e-05,  1.82828216e-05,  1.06335963e-07],
       [ 8.84812140e-03, -3.41677824e-03,  5.96105313e-02,
         8.46985237e-01, -1.65378680e-05, -1.11174511e-05,
         6.80497952e-05,  3.80756245e-05, -2.00137279e-05,
        -1.57859846e-05,  5.37779093e-05,  4.26160257e-04,
         1.62374105e-03,  3.01422381e-02,  2.52140745e-06,
         1.11198687e-05,  3.14741462e-05,  7.56306966e-06,
         1.57967585e-05,  2.36061160e-07, -1.28776188e-