In [1]:

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler # <-- MUST-HAVE
from sklearn.svm import SVC # "C" stands for Classifier
from sklearn.metrics import classification_report, confusion_matrix



In [2]:

# --- Load the Data ---
cancer = load_breast_cancer()
X = cancer.data   # The 30 features
y = cancer.target # The 2 classes (0 or 1)



In [3]:

# --- 2. Explore the Data (EDA) ---
# It's a clean dataset, so we'll just check the info
print(f"Number of features: {X.shape[1]}")
print(f"Number of samples: {X.shape[0]}")
# No missing values!



Number of features: 30
Number of samples: 569


In [4]:

# --- 3. Prepare Data ---
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# --- Scale the Data ---
# Vijay: "Do I *really* have to scale?"
# AK: "For SVM? YES. 1000%. It is *not optional*. The 'margin'
# is a distance, so all features MUST be on the same scale."

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)



In [5]:

# --- 4. Create & Train the Model ---
# 1. Import (we did)
# 2. Create an "instance"
#    We'll use the 'rbf' kernel (the magic, non-linear one)
#    and a 'C' of 1.0 (a good default)
model = SVC(kernel='rbf', C=1.0, random_state=42)

# 3. TRAIN!
# This is where it finds the "maximum margin hyperplane"
# It might take a second...
print("\n...Training the SVM... (this can take a moment)")
model.fit(X_train_scaled, y_train)
print("...Model is Trained!...")




...Training the SVM... (this can take a moment)
...Model is Trained!...


In [6]:

# --- 5. Predict & Evaluate ---
y_pred = model.predict(X_test_scaled)

print("\n--- Model Evaluation ---")
print(confusion_matrix(y_test, y_pred))

# '0' is 'Malignant' (dangerous), '1' is 'Benign' (harmless)
print(classification_report(y_test, y_pred, target_names=['Malignant (0)', 'Benign (1)']))




--- Model Evaluation ---
[[41  2]
 [ 0 71]]
               precision    recall  f1-score   support

Malignant (0)       1.00      0.95      0.98        43
   Benign (1)       0.97      1.00      0.99        71

     accuracy                           0.98       114
    macro avg       0.99      0.98      0.98       114
 weighted avg       0.98      0.98      0.98       114

