In [1]:
import pandas as pd
import numpy as np

In [2]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [3]:
data = load_breast_cancer()
X = data.data
y = data.target

In [4]:
df = pd.DataFrame(X, columns=data.feature_names)
df['target'] = y

In [5]:
print("Dataset Shape:", df.shape)
print(df.head())


Dataset Shape: (569, 31)
   mean radius  mean texture  mean perimeter  mean area  mean smoothness  \
0        17.99         10.38          122.80     1001.0          0.11840   
1        20.57         17.77          132.90     1326.0          0.08474   
2        19.69         21.25          130.00     1203.0          0.10960   
3        11.42         20.38           77.58      386.1          0.14250   
4        20.29         14.34          135.10     1297.0          0.10030   

   mean compactness  mean concavity  mean concave points  mean symmetry  \
0           0.27760          0.3001              0.14710         0.2419   
1           0.07864          0.0869              0.07017         0.1812   
2           0.15990          0.1974              0.12790         0.2069   
3           0.28390          0.2414              0.10520         0.2597   
4           0.13280          0.1980              0.10430         0.1809   

   mean fractal dimension  ...  worst texture  worst perimeter  wor

In [6]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

In [7]:
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)


In [8]:
default_model = SVC()
default_model.fit(X_train, y_train)

default_pred = default_model.predict(X_test)
default_accuracy = accuracy_score(y_test, default_pred)


In [9]:
print("\nDefault Model Accuracy:", default_accuracy)


Default Model Accuracy: 0.9824561403508771


In [10]:
param_grid = {
    'C': [0.1, 1, 10, 100],
    'gamma': [1, 0.1, 0.01, 0.001],
    'kernel': ['rbf', 'linear']
}


In [11]:
grid = GridSearchCV(
    SVC(),
    param_grid,
    cv=5,
    verbose=1,
    n_jobs=-1
)

In [12]:
grid.fit(X_train, y_train)
print("\nBest Parameters Found:")
print(grid.best_params_)

Fitting 5 folds for each of 32 candidates, totalling 160 fits

Best Parameters Found:
{'C': 0.1, 'gamma': 1, 'kernel': 'linear'}


In [13]:
best_model = grid.best_estimator_
tuned_pred = best_model.predict(X_test)
tuned_accuracy = accuracy_score(y_test, tuned_pred)

print("\nTuned Model Accuracy:", tuned_accuracy)


Tuned Model Accuracy: 0.9824561403508771


In [14]:
comparison = pd.DataFrame({
    "Model": ["Default SVM", "Tuned SVM (GridSearchCV)"],
    "Accuracy": [default_accuracy, tuned_accuracy]
})

In [15]:
print("\nPerformance Comparison:")
print(comparison)

print("\nClassification Report (Tuned Model):")
print(classification_report(y_test, tuned_pred))



Performance Comparison:
                      Model  Accuracy
0               Default SVM  0.982456
1  Tuned SVM (GridSearchCV)  0.982456

Classification Report (Tuned Model):
              precision    recall  f1-score   support

           0       0.98      0.98      0.98        42
           1       0.99      0.99      0.99        72

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114



In [16]:
print("\nConfusion Matrix:")
print(confusion_matrix(y_test, tuned_pred))


Confusion Matrix:
[[41  1]
 [ 1 71]]
