## Perform SVM with PCA operation on Breast Cancer Dataset.

In [1]:
%matplotlib inline
import pandas as pd
from sklearn import datasets
from sklearn.decomposition import PCA
from sklearn import svm,decomposition

dataset = datasets.load_breast_cancer()
dataset.target_names

array(['malignant', 'benign'], dtype='<U9')

In [2]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(dataset.data ,dataset.target, random_state=126)

print(X_train.shape, X_test.shape)

(426, 30) (143, 30)


In [3]:
pca = decomposition.PCA(n_components=20, whiten=True)
pca.fit(X_train)

PCA(copy=True, iterated_power='auto', n_components=20, random_state=None,
    svd_solver='auto', tol=0.0, whiten=True)

In [4]:
X_train_pca = pca.transform(X_train)
X_test_pca = pca.transform(X_test)
print(X_train_pca.shape)

clf = svm.SVC(C=2., gamma=0.001)
clf.fit(X_train_pca, y_train)

(426, 20)


SVC(C=2.0, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma=0.001, kernel='rbf',
    max_iter=-1, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

In [5]:
from sklearn import metrics
y_pred = clf.predict(X_test_pca)

print(metrics.classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       1.00      0.58      0.73        52
           1       0.81      1.00      0.89        91

    accuracy                           0.85       143
   macro avg       0.90      0.79      0.81       143
weighted avg       0.88      0.85      0.83       143



In [6]:
from sklearn.pipeline import Pipeline
clf = Pipeline([('pca', decomposition.PCA(n_components=2, whiten=True)), ('svm', svm.LinearSVC(C=2.0))])

clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
print(metrics.confusion_matrix(y_pred, y_test))

[[41  4]
 [11 87]]
