In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

# 1. Load Dataset (Reliable Source)
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target

print("--- DATASET LOADED ---")
print(f"Features: {X.shape[1]}")
print(f"Samples: {X.shape[0]}")

# 2. Preprocessing
# Split Data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scaling (Crucial for SVM)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# 3. Train Default Model (Baseline)
# We train a standard SVM with default parameters to see "before" performance
default_model = SVC(random_state=42)
default_model.fit(X_train_scaled, y_train)
default_acc = accuracy_score(y_test, default_model.predict(X_test_scaled))

print(f"\nDefault Model Accuracy: {default_acc:.4f}")

# 4. Define Parameter Grid for Tuning
# We will tune 'C', 'kernel', and 'gamma'
param_grid = {
    'C': [0.1, 1, 10, 100],              # Regularization parameter
    'gamma': [1, 0.1, 0.01, 0.001],      # Kernel coefficient
    'kernel': ['rbf', 'linear']          # Kernel type
}

print("\n--- STARTING GRID SEARCH (This may take a moment) ---")

# 5. Apply GridSearchCV
# cv=5 means 5-Fold Cross-Validation
grid = GridSearchCV(SVC(random_state=42), param_grid, refit=True, verbose=2, cv=5)
grid.fit(X_train_scaled, y_train)

# 6. Extract Best Results
print("\n--- TUNING COMPLETE ---")
print(f"Best Parameters Found: {grid.best_params_}")
print(f"Best Cross-Validation Score: {grid.best_score_:.4f}")

# 7. Evaluate Tuned Model
best_model = grid.best_estimator_
tuned_acc = accuracy_score(y_test, best_model.predict(X_test_scaled))

print(f"Tuned Model Accuracy: {tuned_acc:.4f}")

# 8. Comparison Table
comparison = pd.DataFrame({
    'Model': ['Default SVM', 'Tuned SVM (GridSearch)'],
    'Accuracy': [default_acc, tuned_acc],
    'Improvement': [0, tuned_acc - default_acc]
})
print("\n--- PERFORMANCE COMPARISON ---")
display(comparison)

# 9. Confusion Matrix for Best Model
y_pred = best_model.predict(X_test_scaled)
plt.figure(figsize=(6, 4))
sns.heatmap(confusion_matrix(y_test, y_pred), annot=True, fmt='d', cmap='Purples')
plt.title(f"Confusion Matrix (Best Params: {grid.best_params_['C']})")
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()