# Cancer Classification - SVM (4 Kernels)

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, recall_score, precision_score, f1_score, jaccard_score, confusion_matrix, roc_curve, auc

In [None]:
# Load and prepare data
df = pd.read_csv('samples_cancer.csv')
X = df.drop('diagnosis', axis=1).select_dtypes(include=[np.number])
y = (df['diagnosis'] == 'M').astype(int)  # Convert to binary: 1=Malignant, 0=Benign

# Scale features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)

In [None]:
# Train all SVM kernels and collect metrics
kernels = ['linear', 'poly', 'rbf', 'sigmoid']
results = {}

for kernel in kernels:
    svm = SVC(kernel=kernel, probability=True, random_state=42)
    svm.fit(X_train, y_train)
    y_pred = svm.predict(X_test)
    y_prob = svm.predict_proba(X_test)[:, 1]
    
    acc = accuracy_score(y_test, y_pred)
    results[kernel] = {
        'y_pred': y_pred, 'y_prob': y_prob,
        'Accuracy': acc, 'Recall': recall_score(y_test, y_pred),
        'Precision': precision_score(y_test, y_pred), 'F1-Score': f1_score(y_test, y_pred),
        'Jaccard': jaccard_score(y_test, y_pred), 'Error Rate': 1 - acc
    }

# Display metrics table
metrics_df = pd.DataFrame({k: {m: v for m, v in r.items() if m not in ['y_pred', 'y_prob']} 
                          for k, r in results.items()}).T
print(metrics_df.round(4))

In [None]:
# Confusion Matrices
fig, axes = plt.subplots(2, 2, figsize=(10, 8))
for idx, kernel in enumerate(kernels):
    cm = confusion_matrix(y_test, results[kernel]['y_pred'])
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', ax=axes[idx//2, idx%2])
    axes[idx//2, idx%2].set_title(f'{kernel.upper()}')
    axes[idx//2, idx%2].set_xlabel('Predicted')
    axes[idx//2, idx%2].set_ylabel('Actual')
plt.tight_layout()
plt.show()

In [None]:
# ROC Curves Comparison
plt.figure(figsize=(8, 6))
for kernel in kernels:
    fpr, tpr, _ = roc_curve(y_test, results[kernel]['y_prob'])
    plt.plot(fpr, tpr, label=f'{kernel.upper()} (AUC={auc(fpr, tpr):.3f})')
plt.plot([0, 1], [0, 1], 'k--', label='Random')
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves - SVM Kernels')
plt.legend()
plt.grid(alpha=0.3)
plt.show()