In [1]:
#q1
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.preprocessing import StandardScaler # Added scaling for better comparison later, although not strictly asked in Q1

# --- a) Load and Split (80:20) ---
iris = load_iris()
X, y = iris.data, iris.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# It's good practice to scale, especially before RBF/Poly, but we'll focus on the kernel comparison as requested.
# For consistency and better results, let's scale the features.
scaler_i = StandardScaler()
X_train_scaled = scaler_i.fit_transform(X_train)
X_test_scaled = scaler_i.transform(X_test)


# --- b) Train Models & c) Evaluate & d) Confusion Matrix ---
kernels = {
    'Linear': SVC(kernel='linear', random_state=42),
    'Polynomial': SVC(kernel='poly', degree=3, random_state=42),
    'RBF': SVC(kernel='rbf', random_state=42)
}

results = []

print("--- Q1: SVM Kernel Comparison on Iris Dataset ---")

for name, model in kernels.items():
    model.fit(X_train_scaled, y_train)
    y_pred = model.predict(X_test_scaled)
    
    # Calculate metrics
    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='macro')
    recall = recall_score(y_test, y_pred, average='macro')
    f1 = f1_score(y_test, y_pred, average='macro')
    cm = confusion_matrix(y_test, y_pred)
    
    results.append({
        'Kernel': name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1,
        'Confusion Matrix': cm
    })
    
    print(f"\nModel: {name}")
    print(f"  Accuracy: {accuracy:.4f}, F1-Score: {f1:.4f}")
    print(f"  Confusion Matrix:\n{cm}")

# --- e) Identify Best Kernel ---
results_df = pd.DataFrame(results)
best_model = results_df.sort_values(by='Accuracy', ascending=False).iloc[0]

print("\n--- Summary ---")
print(results_df[['Kernel', 'Accuracy', 'F1-Score']])
print(f"\nBest Performing Kernel: **{best_model['Kernel']}**")
print("Reason: This kernel achieved the highest overall metrics (Accuracy and F1-Score) on the test set, indicating it found the most effective non-linear (or linear) separation boundary for the Iris data.")

--- Q1: SVM Kernel Comparison on Iris Dataset ---

Model: Linear
  Accuracy: 0.9667, F1-Score: 0.9659
  Confusion Matrix:
[[10  0  0]
 [ 0  8  1]
 [ 0  0 11]]

Model: Polynomial
  Accuracy: 0.9667, F1-Score: 0.9666
  Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  1 10]]

Model: RBF
  Accuracy: 1.0000, F1-Score: 1.0000
  Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]

--- Summary ---
       Kernel  Accuracy  F1-Score
0      Linear  0.966667  0.965899
1  Polynomial  0.966667  0.966583
2         RBF  1.000000  1.000000

Best Performing Kernel: **RBF**
Reason: This kernel achieved the highest overall metrics (Accuracy and F1-Score) on the test set, indicating it found the most effective non-linear (or linear) separation boundary for the Iris data.


In [2]:
#q2
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import pandas as pd

# --- a) Load Dataset ---
cancer = load_breast_cancer()
X_c, y_c = cancer.data, cancer.target
X_train_c, X_test_c, y_train_c, y_test_c = train_test_split(
    X_c, y_c, test_size=0.2, random_state=42
)

# --- b) Train SVM with and without scaling (RBF kernel) ---

# 1. Model WITHOUT Scaling
svm_unscaled = SVC(kernel='rbf', random_state=42)
svm_unscaled.fit(X_train_c, y_train_c)

# 2. Model WITH Scaling (StandardScaler)
scaler_c = StandardScaler()
X_train_c_scaled = scaler_c.fit_transform(X_train_c)
X_test_c_scaled = scaler_c.transform(X_test_c)

svm_scaled = SVC(kernel='rbf', random_state=42)
svm_scaled.fit(X_train_c_scaled, y_train_c)

# Comparison
results_scaling = {
    'Unscaled': {
        'Train Accuracy': accuracy_score(y_train_c, svm_unscaled.predict(X_train_c)),
        'Test Accuracy': accuracy_score(y_test_c, svm_unscaled.predict(X_test_c))
    },
    'Scaled': {
        'Train Accuracy': accuracy_score(y_train_c, svm_scaled.predict(X_train_c_scaled)),
        'Test Accuracy': accuracy_score(y_test_c, svm_scaled.predict(X_test_c_scaled))
    }
}

results_df_scaling = pd.DataFrame(results_scaling).T

print("\n--- Q2: Effect of Feature Scaling on SVM (RBF) ---")
print(results_df_scaling.to_markdown())

# --- c) Discussion ---
print("\n--- Discussion: Effect of Feature Scaling ---")
print("Feature scaling is **crucial** for kernel-based SVMs (like RBF) because the RBF kernel relies on the Euclidean distance between data points. Without scaling:")
print("1. **Unequal Influence:** Features with larger magnitudes (e.g., 'mean area') dominate the distance calculation, effectively overshadowing features with smaller ranges (e.g., 'mean smoothness').")
print("2. **Poor Hyperplane Placement:** The algorithm incorrectly weights the importance of features, leading to a sub-optimal separating hyperplane.")
print("As shown in the results, **scaling dramatically improves both training and testing accuracy** because it normalizes the features, allowing all attributes to contribute equally to the distance calculation, thus leading to a more effective maximum margin classifier.")


--- Q2: Effect of Feature Scaling on SVM (RBF) ---
|          |   Train Accuracy |   Test Accuracy |
|:---------|-----------------:|----------------:|
| Unscaled |         0.914286 |        0.947368 |
| Scaled   |         0.989011 |        0.982456 |

--- Discussion: Effect of Feature Scaling ---
Feature scaling is **crucial** for kernel-based SVMs (like RBF) because the RBF kernel relies on the Euclidean distance between data points. Without scaling:
1. **Unequal Influence:** Features with larger magnitudes (e.g., 'mean area') dominate the distance calculation, effectively overshadowing features with smaller ranges (e.g., 'mean smoothness').
2. **Poor Hyperplane Placement:** The algorithm incorrectly weights the importance of features, leading to a sub-optimal separating hyperplane.
As shown in the results, **scaling dramatically improves both training and testing accuracy** because it normalizes the features, allowing all attributes to contribute equally to the distance calculation,