In [5]:
# day_4_cleaned.py: SVM Training on Breast Cancer Dataset (Accuracy Only)

import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# === 1. Load and Preprocess Breast Cancer Dataset ===

# Load dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target  # 0: malignant, 1: benign

# Split train/test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# === 2. Train and Tune Linear SVC ===

param_grid_linear = {'C': [0.001, 0.01, 0.1, 1, 10, 100]}
svc_linear = SVC(kernel='linear', random_state=42)
grid_linear = GridSearchCV(svc_linear, param_grid_linear, cv=5, scoring='accuracy', n_jobs=-1)
grid_linear.fit(X_train_scaled, y_train)

best_linear = grid_linear.best_estimator_
y_pred_linear = best_linear.predict(X_test_scaled)
linear_accuracy = accuracy_score(y_test, y_pred_linear)
print(f"Linear SVC Accuracy: {linear_accuracy:.4f}")

# === 3. Train and Tune RBF SVC ===

param_grid_rbf = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100],
    'gamma': ['scale', 'auto', 0.001, 0.01, 0.1]
}
svc_rbf = SVC(kernel='rbf', random_state=42)
grid_rbf = GridSearchCV(svc_rbf, param_grid_rbf, cv=5, scoring='accuracy', n_jobs=-1)
grid_rbf.fit(X_train_scaled, y_train)

best_rbf = grid_rbf.best_estimator_
y_pred_rbf = best_rbf.predict(X_test_scaled)
rbf_accuracy = accuracy_score(y_test, y_pred_rbf)
print(f"RBF SVC Accuracy: {rbf_accuracy:.4f}")


Linear SVC Accuracy: 0.9825
RBF SVC Accuracy: 0.9825
