In [1]:
# Q1 — SVM on Iris dataset

from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix, classification_report
from sklearn.svm import SVC
import pandas as pd

# a) Load dataset and train-test split (80:20)
iris = load_iris()
X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)

# Standard scaling (important for SVM)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Function to train + evaluate a model
def evaluate_model(model, X_train, X_test, y_train, y_test):
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    results = {
        "Accuracy": accuracy_score(y_test, y_pred),
        "Precision": precision_score(y_test, y_pred, average='macro'),
        "Recall": recall_score(y_test, y_pred, average='macro'),
        "F1-Score": f1_score(y_test, y_pred, average='macro'),
        "Confusion Matrix": confusion_matrix(y_test, y_pred)
    }
    return results

# b) Train SVM with different kernels
models = {
    "Linear Kernel": SVC(kernel='linear'),
    "Polynomial Kernel (degree=3)": SVC(kernel='poly', degree=3),
    "RBF Kernel": SVC(kernel='rbf')
}

#Linear kernel → straight line separation
#Polynomial kernel (degree 3) → curved boundaries
#RBF kernel → most flexible boundary using Gaussian function

# Evaluate each model
all_results = {}

for name, model in models.items():
    all_results[name] = evaluate_model(model, X_train, X_test, y_train, y_test)

# Display results
for model_name, metrics in all_results.items():
    print(f"\n==== {model_name} ====")
    print(f"Accuracy: {metrics['Accuracy']:.4f}")
    print(f"Precision: {metrics['Precision']:.4f}")
    print(f"Recall: {metrics['Recall']:.4f}")
    print(f"F1-Score: {metrics['F1-Score']:.4f}")
    print("Confusion Matrix:")
    print(metrics["Confusion Matrix"])



==== Linear Kernel ====
Accuracy: 1.0000
Precision: 1.0000
Recall: 1.0000
F1-Score: 1.0000
Confusion Matrix:
[[10  0  0]
 [ 0 10  0]
 [ 0  0 10]]

==== Polynomial Kernel (degree=3) ====
Accuracy: 0.9000
Precision: 0.9231
Recall: 0.9000
F1-Score: 0.8977
Confusion Matrix:
[[10  0  0]
 [ 0 10  0]
 [ 0  3  7]]

==== RBF Kernel ====
Accuracy: 0.9667
Precision: 0.9697
Recall: 0.9667
F1-Score: 0.9666
Confusion Matrix:
[[10  0  0]
 [ 0  9  1]
 [ 0  0 10]]


In [2]:
# Q2 — SVM on Breast Cancer Dataset (With vs Without Feature Scaling)

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# a) Load the breast cancer dataset
data = load_breast_cancer()
X = data.data
y = data.target

# Train–test split (80:20)
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42, stratify=y
)
#random_state=42 → same random split every run

# --------------------------
# Model 1: WITHOUT SCALING
# --------------------------
svm_no_scale = SVC(kernel='rbf')
svm_no_scale.fit(X_train, y_train)

train_acc_no_scale = svm_no_scale.score(X_train, y_train)
test_acc_no_scale = svm_no_scale.score(X_test, y_test)

# --------------------------
# Model 2: WITH SCALING
# --------------------------
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

svm_scaled = SVC(kernel='rbf')
svm_scaled.fit(X_train_scaled, y_train)

train_acc_scaled = svm_scaled.score(X_train_scaled, y_train)
test_acc_scaled = svm_scaled.score(X_test_scaled, y_test)

# --------------------------
# Print results
# --------------------------
print("===== SVM (RBF) WITHOUT Scaling =====")
print(f"Training Accuracy: {train_acc_no_scale:.4f}")
print(f"Testing Accuracy : {test_acc_no_scale:.4f}")

print("\n===== SVM (RBF) WITH Scaling =====")
print(f"Training Accuracy: {train_acc_scaled:.4f}")
print(f"Testing Accuracy : {test_acc_scaled:.4f}")


===== SVM (RBF) WITHOUT Scaling =====
Training Accuracy: 0.9187
Testing Accuracy : 0.9298

===== SVM (RBF) WITH Scaling =====
Training Accuracy: 0.9824
Testing Accuracy : 0.9825
