In [None]:
# UCI Breast Cancer Dataset - SVM Model Training

## Table of Contents
1. Load Preprocessed Data
2. Train SVM Model
3. Evaluate Performance
4. Confusion Matrix & Classification Report
5. ROC Curve Analysis
6. Hyperparameter Tuning with Grid Search
7. Save Best Model
8. Make Predictions

In [None]:
## 1. Import  Libraries


In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import joblib
import os
from time import time

# SVM and model selection
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV, cross_val_score

# Evaluation metrics
from sklearn.metrics import (
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score,
    confusion_matrix, 
    classification_report,
    roc_auc_score,
    roc_curve
)

import warnings
warnings.filterwarnings('ignore')

# Set random seed for reproducibility
np.random.seed(42)

print("imported successfully")


In [None]:
## 2. Loading previously preprocessed Data


In [None]:
# Load preprocessed data
print("Loading preprocessed data...")

X_train = pd.read_csv('data/processed/X_train_scaled.csv')
y_train = pd.read_csv('data/processed/y_train.csv')
X_test = pd.read_csv('data/processed/X_test_scaled.csv')
y_test = pd.read_csv('data/processed/y_test.csv')

# Convert to numpy arrays
y_train = y_train.values.ravel()
y_test = y_test.values.ravel()

print(f"✓ Data loaded successfully!")
print(f"\nDataset Information:")
print(f"  Training samples: {X_train.shape[0]}")
print(f"  Testing samples: {X_test.shape[0]}")
print(f"  Number of features: {X_train.shape[1]}")
print(f"\nClass Distribution:")
print(f"  Training - Benign: {np.sum(y_train == 0)}, Malignant: {np.sum(y_train == 1)}")
print(f"  Testing - Benign: {np.sum(y_test == 0)}, Malignant: {np.sum(y_test == 1)}")


In [None]:
## 3. Train Basic SVM Model
basic svm with rbf

In [None]:
# Create and train SVM model
print("Training SVM model with RBF kernel...")
print("="*60)

start_time = time()

# Initialize SVM with RBF kernel
svm_model = SVC(kernel='rbf', random_state=42, probability=True)

# Train the model
svm_model.fit(X_train, y_train)

training_time = time() - start_time

print(f"Model trained successfully in {training_time:.2f} seconds!")
print(f"\nModel Parameters:")
print(f"  Kernel: {svm_model.kernel}")
print(f"  C (regularization): {svm_model.C}")
print(f"  Gamma: {svm_model.gamma}")
