In [1]:
# import the necessary libraries
from sklearn.datasets import load_breast_cancer  #Loads the breast cancer dataset from sklearn.datasets
from sklearn.preprocessing import StandardScaler #Standardizes features by removing the mean and scaling to unit variance.
from sklearn.decomposition import PCA #Performs Principal Component Analysis for dimensionality reduction.
from sklearn.linear_model import LogisticRegression #A classification algorithm used to predict binary outcomes.
from sklearn.model_selection import train_test_split #Splits the dataset into training and testing sets.
from sklearn.metrics import accuracy_score #Measures how well the classifier performs
import matplotlib.pyplot as plt #Used for plotting the PCA components visually.


df = load_breast_cancer()
X = df.data
y = df.target

In [2]:
# Ensures each feature contributes equally to the PCA.
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [3]:
# Computes the PCA and applies the transformation.
pca = PCA(n_components=2)
X_pca = pca.fit_transform(X_scaled)

In [5]:
# Splitting the Data for Training and Testing
X_train, X_test, y_train, y_test = train_test_split(X_pca, y, test_size=0.2, random_state=42)

In [6]:
# Initializes the logistic regression model and Trains it on the PCA-reduced training data
clf = LogisticRegression()
clf.fit(X_train, y_train)

LogisticRegression()

In [7]:
# Makes Predictions and Evaluating Accuracy
y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Logistic Regression Accuracy on PCA-reduced data: {accuracy:.2f}")

Logistic Regression Accuracy on PCA-reduced data: 0.99
