In [1]:
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.preprocessing import StandardScaler

# Load the breast cancer dataset
data = load_breast_cancer()

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(data.data, data.target, test_size=0.25, random_state=42)

# Scale the data to have unit variance
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Fit PCA to the training data
pca = PCA(n_components=10)
pca.fit(X_train_scaled)

# Transform the training and testing data using the fitted PCA object
X_train_pca = pca.transform(X_train_scaled)
X_test_pca = pca.transform(X_test_scaled)

# Fit SVM to the original data
svm_model_orig = SVC(kernel='rbf', random_state=42)
svm_model_orig.fit(X_train_scaled, y_train)

# Evaluate the accuracy of the SVM model on the test data
y_pred_svm_orig = svm_model_orig.predict(X_test_scaled)
accuracy_orig_svm = accuracy_score(y_test, y_pred_svm_orig)
precision_orig_svm = precision_score(y_test, y_pred_svm_orig)
recall_orig_svm = recall_score(y_test, y_pred_svm_orig)
f1_orig_svm = f1_score(y_test, y_pred_svm_orig)

# Fit SVM to the PCA-transformed data
svm_model_pca = SVC(kernel='rbf', random_state=42)
svm_model_pca.fit(X_train_pca, y_train)

# Evaluate the accuracy of the SVM model on the test data
y_pred_svm_pca = svm_model_pca.predict(X_test_pca)
accuracy_pca_svm = accuracy_score(y_test, y_pred_svm_pca)
precision_pca_svm = precision_score(y_test, y_pred_svm_pca)
recall_pca_svm = recall_score(y_test, y_pred_svm_pca)
f1_pca_svm = f1_score(y_test, y_pred_svm_pca)

# Fit logistic regression to the original data
lr_model_orig = LogisticRegression(random_state=42)
lr_model_orig.fit(X_train_scaled, y_train)

# Evaluate the accuracy of the logistic regression model on the test data
y_pred_lr_orig = lr_model_orig.predict(X_test_scaled)
accuracy_orig_lr = accuracy_score(y_test, y_pred_lr_orig)
precision_orig_lr = precision_score(y_test, y_pred_lr_orig)
recall_orig_lr = recall_score(y_test, y_pred_lr_orig)
f1_orig_lr = f1_score(y_test, y_pred_lr_orig)

# Fit logistic regression to the PCA-transformed data
lr_model_pca = LogisticRegression(random_state=42)
lr_model_pca.fit(X_train_pca, y_train)

# Evaluate the accuracy of the logistic regression model on the test data
y_pred_lr_pca = lr_model_pca.predict(X_test_pca)
accuracy_pca_lr = accuracy_score(y_test, y_pred_lr_pca)
precision_pca_lr = precision_score(y_test, y_pred_lr_pca)
recall_pca_lr = recall_score(y_test, y_pred_lr_pca)
f1_pca_lr = f1_score(y_test, y_pred_lr_pca)

# Fit KNN to the original data
knn_model_orig = KNeighborsClassifier()
knn_model_orig.fit(X_train_scaled, y_train)

# Evaluate the accuracy of the KNN model on the test data
y_pred_knn_orig = knn_model_orig.predict(X_test_scaled)
accuracy_orig_knn = accuracy_score(y_test, y_pred_knn_orig)
precision_orig_knn = precision_score(y_test, y_pred_knn_orig)
recall_orig_knn = recall_score(y_test, y_pred_knn_orig)
f1_orig_knn = f1_score(y_test, y_pred_knn_orig)

# Fit KNN to the PCA-transformed data
knn_model_pca = KNeighborsClassifier()
knn_model_pca.fit(X_train_pca, y_train)

# Evaluate the accuracy of the KNN model on the test data
y_pred_knn_pca = knn_model_pca.predict(X_test_pca)
accuracy_pca_knn = accuracy_score(y_test, y_pred_knn_pca)
precision_pca_knn = precision_score(y_test, y_pred_knn_pca)
recall_pca_knn = recall_score(y_test, y_pred_knn_pca)
f1_pca_knn = f1_score(y_test, y_pred_knn_pca)

# Print the results
print('SVM classification accuracy before PCA: {:.3f}'.format(accuracy_orig_svm))
print('SVM classification accuracy after PCA: {:.3f}'.format(accuracy_pca_svm))
print('SVM precision before PCA: {:.3f}'.format(precision_orig_svm))
print('SVM precision after PCA: {:.3f}'.format(precision_pca_svm))
print('SVM recall before PCA: {:.3f}'.format(recall_orig_svm))
print('SVM recall after PCA: {:.3f}'.format(recall_pca_svm))
print('SVM F1 score before PCA: {:.3f}'.format(f1_orig_svm))
print('SVM F1 score after PCA: {:.3f}'.format(f1_pca_svm))

print('Logistic regression classification accuracy before PCA: {:.3f}'.format(accuracy_orig_lr))
print('Logistic regression classification accuracy after PCA: {:.3f}'.format(accuracy_pca_lr))
print('Logistic regression precision before PCA: {:.3f}'.format(precision_orig_lr))
print('Logistic regression precision after PCA: {:.3f}'.format(precision_pca_lr))
print('Logistic regression recall before PCA: {:.3f}'.format(recall_orig_lr))
print('Logistic regression recall after PCA: {:.3f}'.format(recall_pca_lr))
print('Logistic regression F1 score before PCA: {:.3f}'.format(f1_orig_lr))
print('Logistic regression F1 score after PCA: {:.3f}'.format(f1_pca_lr))

print('KNN classification accuracy before PCA: {:.3f}'.format(accuracy_orig_knn))
print('KNN classification accuracy after PCA: {:.3f}'.format(accuracy_pca_knn))
print('KNN precision before PCA: {:.3f}'.format(precision_orig_knn))
print('KNN precision after PCA: {:.3f}'.format(precision_pca_knn))
print('KNN recall before PCA: {:.3f}'.format(recall_orig_knn))
print('KNN recall after PCA: {:.3f}'.format(recall_pca_knn))
print('KNN F1 score before PCA: {:.3f}'.format(f1_orig_knn))
print('KNN F1 score after PCA: {:.3f}'.format(f1_pca_knn))

SVM classification accuracy before PCA: 0.972
SVM classification accuracy after PCA: 0.958
SVM precision before PCA: 0.978
SVM precision after PCA: 0.977
SVM recall before PCA: 0.978
SVM recall after PCA: 0.955
SVM F1 score before PCA: 0.978
SVM F1 score after PCA: 0.966
Logistic regression classification accuracy before PCA: 0.979
Logistic regression classification accuracy after PCA: 0.986
Logistic regression precision before PCA: 0.989
Logistic regression precision after PCA: 0.989
Logistic regression recall before PCA: 0.978
Logistic regression recall after PCA: 0.989
Logistic regression F1 score before PCA: 0.983
Logistic regression F1 score after PCA: 0.989
KNN classification accuracy before PCA: 0.958
KNN classification accuracy after PCA: 0.965
KNN precision before PCA: 0.966
KNN precision after PCA: 0.967
KNN recall before PCA: 0.966
KNN recall after PCA: 0.978
KNN F1 score before PCA: 0.966
KNN F1 score after PCA: 0.972
