In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
from sklearn.metrics import accuracy_score, classification_report
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression

In [3]:
from sklearn.datasets import  load_breast_cancer

In [4]:
df = load_breast_cancer()

In [5]:
df.keys()

dict_keys(['data', 'target', 'target_names', 'DESCR', 'feature_names'])

In [6]:
X = df['data']

In [7]:
y = df['target']

In [8]:
X.shape , y.shape

((569, 30), (569,))

In [9]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [10]:
X_scaled.shape


(569, 30)

In [11]:
pca = PCA()
pca.fit(X_scaled)

PCA(copy=True, iterated_power='auto', n_components=None, random_state=None,
  svd_solver='auto', tol=0.0, whiten=False)

In [12]:
pca.explained_variance_

array([1.33049908e+01, 5.70137460e+00, 2.82291016e+00, 1.98412752e+00,
       1.65163324e+00, 1.20948224e+00, 6.76408882e-01, 4.77456255e-01,
       4.17628782e-01, 3.51310875e-01, 2.94433153e-01, 2.61621161e-01,
       2.41782421e-01, 1.57286149e-01, 9.43006956e-02, 8.00034045e-02,
       5.95036135e-02, 5.27114222e-02, 4.95647002e-02, 3.12142606e-02,
       3.00256631e-02, 2.74877113e-02, 2.43836914e-02, 1.80867940e-02,
       1.55085271e-02, 8.19203712e-03, 6.91261258e-03, 1.59213600e-03,
       7.50121413e-04, 1.33279057e-04])

In [13]:
pca.explained_variance_ratio_

array([4.42720256e-01, 1.89711820e-01, 9.39316326e-02, 6.60213492e-02,
       5.49576849e-02, 4.02452204e-02, 2.25073371e-02, 1.58872380e-02,
       1.38964937e-02, 1.16897819e-02, 9.79718988e-03, 8.70537901e-03,
       8.04524987e-03, 5.23365745e-03, 3.13783217e-03, 2.66209337e-03,
       1.97996793e-03, 1.75395945e-03, 1.64925306e-03, 1.03864675e-03,
       9.99096464e-04, 9.14646751e-04, 8.11361259e-04, 6.01833567e-04,
       5.16042379e-04, 2.72587995e-04, 2.30015463e-04, 5.29779290e-05,
       2.49601032e-05, 4.43482743e-06])

In [14]:
np.sum(pca.explained_variance_ratio_)

1.0000000000000002

In [15]:
X_pc = pca.transform(X_scaled)

In [16]:
X_train,X_test, y_train, y_test = train_test_split(X_pc, y, random_state = 100)

In [21]:
ac  = {}
cl = {}

algo_names = ['KNeighborsClassifier', 'DecisionTreeClassifier','RandomForestClassifier', 'GaussianNB', 'LogisticRegression','SVC']
algos = [ KNeighborsClassifier(n_neighbors=5), DecisionTreeClassifier(),RandomForestClassifier(n_estimators=10),GaussianNB(), LogisticRegression(), SVC(gamma='auto',kernel='rbf')]


for algo_names, algo in zip(algo_names,  algos):
    al = algo
    al.fit(X_train, y_train)
    y_pred = al.predict(X_test)
    
    ac[algo_names] = [accuracy_score(y_test, y_pred)]
    cl[algo_names] = [classification_report(y_test, y_pred)]
    print(classification_report(y_test, y_pred))

    

    
    
algo_ac = pd.DataFrame.from_dict(ac, orient = 'index',columns = ['accuracy_score'])
algo_cl = pd.DataFrame.from_dict(cl, orient = 'index', columns = ['classification_report'])
algo_test = pd.DataFrame([algo_ac['accuracy_score'], algo_cl['classification_report']]).T.reset_index()


algo_test


             precision    recall  f1-score   support

          0       0.98      0.91      0.94        56
          1       0.95      0.99      0.97        87

avg / total       0.96      0.96      0.96       143

             precision    recall  f1-score   support

          0       0.98      0.86      0.91        56
          1       0.91      0.99      0.95        87

avg / total       0.94      0.94      0.94       143

             precision    recall  f1-score   support

          0       0.96      0.91      0.94        56
          1       0.94      0.98      0.96        87

avg / total       0.95      0.95      0.95       143

             precision    recall  f1-score   support

          0       0.87      0.82      0.84        56
          1       0.89      0.92      0.90        87

avg / total       0.88      0.88      0.88       143

             precision    recall  f1-score   support

          0       1.00      0.91      0.95        56
          1       0.95      1.00 

Unnamed: 0,index,accuracy_score,classification_report
0,KNeighborsClassifier,0.958042,precision recall f1-score s...
1,DecisionTreeClassifier,0.937063,precision recall f1-score s...
2,RandomForestClassifier,0.951049,precision recall f1-score s...
3,GaussianNB,0.881119,precision recall f1-score s...
4,LogisticRegression,0.965035,precision recall f1-score s...
5,SVC,0.972028,precision recall f1-score s...
