In [1]:
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Load the breast cancer dataset
breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target

In [5]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [7]:
# Train an SVM classifier using the original features
svm = SVC(kernel='linear', C=1)
svm.fit(X_train, y_train)
y_pred = svm.predict(X_test)
accuracy_original = accuracy_score(y_test, y_pred)
print("Accuracy without PCA:", accuracy_original)
print("Classification Report without PCA:")
print(classification_report(y_test, y_pred))


Accuracy without PCA: 0.956140350877193
Classification Report without PCA:
              precision    recall  f1-score   support

           0       0.97      0.91      0.94        43
           1       0.95      0.99      0.97        71

    accuracy                           0.96       114
   macro avg       0.96      0.95      0.95       114
weighted avg       0.96      0.96      0.96       114



In [9]:
# Apply PCA to reduce dimensionality
pca = PCA(n_components=0.95)  # Retain 95% of the variance
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
print("Number of features after PCA:", pca.n_components_)


Number of features after PCA: 1


In [11]:
# Retrain the SVM classifier using the PCA-transformed features
svm.fit(X_train_pca, y_train)
y_pred_pca = svm.predict(X_test_pca)
accuracy_pca = accuracy_score(y_test, y_pred_pca)
print("Accuracy with PCA:", accuracy_pca)
print("Classification Report with PCA:")
print(classification_report(y_test, y_pred_pca))

Accuracy with PCA: 0.9473684210526315
Classification Report with PCA:
              precision    recall  f1-score   support

           0       1.00      0.86      0.93        43
           1       0.92      1.00      0.96        71

    accuracy                           0.95       114
   macro avg       0.96      0.93      0.94       114
weighted avg       0.95      0.95      0.95       114

