In [2]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report
)
import numpy as np

# a) Load the dataset and perform trainâ€“test split (80:20)
iris = load_iris()
X = iris.data
y = iris.target
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    stratify=y
)

print("Train shape:", X_train.shape, "Test shape:", X_test.shape)
print("-" * 50)

# b) Train three different SVM models using kernels: Linear, Polynomial (degree=3), RBF
kernels = ['linear', 'poly', 'rbf']
models = {}
results = {}
conf_matrices = {}

for kernel in kernels:
    if kernel == 'poly':
        clf = SVC(kernel='poly', degree=3, random_state=42)
    else:
        clf = SVC(kernel=kernel, random_state=42)

    clf.fit(X_train, y_train)
    models[kernel] = clf

    # Predictions
    y_pred = clf.predict(X_test)

    # c) Evaluate each model
    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='weighted')
    rec = recall_score(y_test, y_pred, average='weighted')
    f1 = f1_score(y_test, y_pred, average='weighted')

    results[kernel] = {
        "Accuracy": acc,
        "Precision": prec,
        "Recall": rec,
        "F1-Score": f1
    }

    # d) Confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    conf_matrices[kernel] = cm

    # Print results
    print(f"=== Kernel: {kernel.upper()} ===")
    print(f"Accuracy : {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall   : {rec:.4f}")
    print(f"F1-Score : {f1:.4f}")
    print("Confusion Matrix:")
    print(cm)
    print("\nClassification Report:")
    print(classification_report(y_test, y_pred, target_names=iris.target_names))
    print("-" * 50)

# e) Identify the best kernel
best_kernel = max(results, key=lambda k: results[k]["Accuracy"])
print("Best Kernel based on Accuracy:", best_kernel.upper())
print("Metrics:", results[best_kernel])


Train shape: (120, 4) Test shape: (30, 4)
--------------------------------------------------
=== Kernel: LINEAR ===
Accuracy : 1.0000
Precision: 1.0000
Recall   : 1.0000
F1-Score : 1.0000
Confusion Matrix:
[[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1.00      1.00      1.00        10
   virginica       1.00      1.00      1.00        10

    accuracy                           1.00        30
   macro avg       1.00      1.00      1.00        30
weighted avg       1.00      1.00      1.00        30

--------------------------------------------------
=== Kernel: POLY ===
Accuracy : 0.9667
Precision: 0.9697
Recall   : 0.9667
F1-Score : 0.9666
Confusion Matrix:
[[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]

Classification Report:
              precision    recall  f1-score   support

      setosa       1.00      1.00      1.00        10
  versicolor       1

In [4]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# A) Load Breast Cancer dataset
cancer = load_breast_cancer()
X_bc = cancer.data
y_bc = cancer.target

# Train-test split (80-20)
X_train_bc, X_test_bc, y_train_bc, y_test_bc = train_test_split(
    X_bc, y_bc,
    test_size=0.2,
    random_state=42,
    stratify=y_bc
)

print("Breast Cancer Train shape:", X_train_bc.shape, "Test shape:", X_test_bc.shape)
print("-" * 50)

# B1) Train SVM (RBF kernel) WITHOUT feature scaling
svm_no_scale = SVC(kernel='rbf', random_state=42)
svm_no_scale.fit(X_train_bc, y_train_bc)

y_train_pred_no = svm_no_scale.predict(X_train_bc)
y_test_pred_no = svm_no_scale.predict(X_test_bc)

train_acc_no = accuracy_score(y_train_bc, y_train_pred_no)
test_acc_no = accuracy_score(y_test_bc, y_test_pred_no)

print("=== SVM with RBF kernel WITHOUT Scaling ===")
print(f"Training Accuracy: {train_acc_no:.4f}")
print(f"Testing Accuracy : {test_acc_no:.4f}")
print("-" * 50)

# B2) Train SVM (RBF kernel) WITH feature scaling (StandardScaler)
scaler = StandardScaler()

X_train_bc_scaled = scaler.fit_transform(X_train_bc)
X_test_bc_scaled = scaler.transform(X_test_bc)

svm_scaled = SVC(kernel='rbf', random_state=42)
svm_scaled.fit(X_train_bc_scaled, y_train_bc)

y_train_pred_scaled = svm_scaled.predict(X_train_bc_scaled)
y_test_pred_scaled = svm_scaled.predict(X_test_bc_scaled)

train_acc_scaled = accuracy_score(y_train_bc, y_train_pred_scaled)
test_acc_scaled = accuracy_score(y_test_bc, y_test_pred_scaled)

print("=== SVM with RBF kernel WITH Scaling (StandardScaler) ===")
print(f"Training Accuracy: {train_acc_scaled:.4f}")
print(f"Testing Accuracy : {test_acc_scaled:.4f}")
print("-" * 50)

# C) Simple comparison print
print(" COMPARISON: Effect of Feature Scaling")
print(f"Without Scaling - Train Acc: {train_acc_no:.4f}, Test Acc: {test_acc_no:.4f}")
print(f"With Scaling    - Train Acc: {train_acc_scaled:.4f}, Test Acc: {test_acc_scaled:.4f}")

if test_acc_scaled > test_acc_no:
    print("\nConclusion: Feature scaling improved SVM performance on the Breast Cancer dataset.")
else:
    print("\nConclusion: Feature scaling did not significantly improve performance in this run (but is generally recommended for SVM).")


Breast Cancer Train shape: (455, 30) Test shape: (114, 30)
--------------------------------------------------
=== SVM with RBF kernel WITHOUT Scaling ===
Training Accuracy: 0.9187
Testing Accuracy : 0.9298
--------------------------------------------------
=== SVM with RBF kernel WITH Scaling (StandardScaler) ===
Training Accuracy: 0.9824
Testing Accuracy : 0.9825
--------------------------------------------------
 COMPARISON: Effect of Feature Scaling
Without Scaling - Train Acc: 0.9187, Test Acc: 0.9298
With Scaling    - Train Acc: 0.9824, Test Acc: 0.9825

Conclusion: Feature scaling improved SVM performance on the Breast Cancer dataset.
