In [1]:
# Q1 - Iris dataset: Compare SVM kernels (Linear, Poly, RBF)
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

RND = 42

# Load dataset
iris = datasets.load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RND, stratify=y)

# Models
kernels = {
    "linear": SVC(kernel="linear", random_state=RND),
    "poly_deg3": SVC(kernel="poly", degree=3, random_state=RND),
    "rbf": SVC(kernel="rbf", random_state=RND)
}

# Train & Evaluate
for name, model in kernels.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    acc = accuracy_score(y_test, y_pred)
    prec = precision_score(y_test, y_pred, average='macro')
    rec = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)

    print(f"\nKernel: {name}")
    print(f"Accuracy: {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall: {rec:.4f}")
    print(f"F1-score: {f1:.4f}")
    print("Confusion Matrix:\n", cm)

# Best kernel (by macro F1)
best_kernel = max(kernels, key=lambda k: f1_score(y_test, kernels[k].predict(X_test), average='macro'))
print(f"\nQ1 Answer: Best Kernel = {best_kernel}")


Kernel: linear
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-score: 1.0000
Confusion Matrix:
 [[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]

Kernel: poly_deg3
Accuracy: 0.9667
Precision: 0.9697
Recall: 0.9667
F1-score: 0.9666
Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]

Kernel: rbf
Accuracy: 0.9667
Precision: 0.9697
Recall: 0.9667
F1-score: 0.9666
Confusion Matrix:
 [[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]

Q1 Answer: Best Kernel = linear


In [2]:
# Q2 â€“ Breast Cancer dataset: Effect of feature scaling on SVM (RBF)
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

RND = 42

# Load dataset
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=RND, stratify=y)

# Without Scaling
model_ns = SVC(kernel="rbf", random_state=RND)
model_ns.fit(X_train, y_train)
train_acc_ns = accuracy_score(y_train, model_ns.predict(X_train))
test_acc_ns = accuracy_score(y_test, model_ns.predict(X_test))

print("WITHOUT Scaling:")
print(f"Train Accuracy: {train_acc_ns:.4f}")
print(f"Test Accuracy: {test_acc_ns:.4f}\n")

# With StandardScaler
scaler = StandardScaler()
X_train_s = scaler.fit_transform(X_train)
X_test_s = scaler.transform(X_test)

model_s = SVC(kernel="rbf", random_state=RND)
model_s.fit(X_train_s, y_train)
train_acc_s = accuracy_score(y_train, model_s.predict(X_train_s))
test_acc_s = accuracy_score(y_test, model_s.predict(X_test_s))

print("WITH Scaling:")
print(f"Train Accuracy: {train_acc_s:.4f}")
print(f"Test Accuracy: {test_acc_s:.4f}\n")

print("Q2 Answer: Scaling improves SVM (RBF) performance because it normalizes feature ranges.")


WITHOUT Scaling:
Train Accuracy: 0.9187
Test Accuracy: 0.9298

WITH Scaling:
Train Accuracy: 0.9824
Test Accuracy: 0.9825

Q2 Answer: Scaling improves SVM (RBF) performance because it normalizes feature ranges.
