In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import pickle
import os
import warnings
import time

warnings.filterwarnings("ignore")

from sklearn.svm import SVC
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV, StratifiedKFold
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
    classification_report,
    ConfusionMatrixDisplay,
)

In [4]:
# loading data
X_train_balanced = np.load(
    "/home/samir-dahal/Diabetes_Classification/data/processed/X_train_balanced.npy"
)
y_train_balanced = np.load(
    "/home/samir-dahal/Diabetes_Classification/data/processed/y_train_balanced.npy"
)
X_test = pd.read_csv(
    "/home/samir-dahal/Diabetes_Classification/data/processed/X_test.csv"
)
y_test = pd.read_csv(
    "/home/samir-dahal/Diabetes_Classification/data/processed/y_test.csv"
)

## SVM with Hyperparameter Tuning

In [5]:
param_grid = {
    "C": [0.1, 1, 10, 100],  # Regularization
    "kernel": ["linear", "rbf", "poly"],  # Kernel type
    "gamma": ["scale", "auto", 0.001, 0.01, 0.1, 1],  # Kernel coefficient
    "degree": [2, 3, 4],  # For polynomial kernel
    "class_weight": [None, "balanced"],  # Handle imbalance
}

In [6]:
svm_base = SVC(
    random_state=42,
    probability=True,  # Enable probability estimates
    verbose=False,
)

In [7]:
cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)


In [8]:
# RandomizedSearchCV
svm_random = RandomizedSearchCV(
    estimator=svm_base,
    param_distributions=param_grid,
    n_iter=50,
    cv=cv,
    scoring="f1_macro",  # Optimize for F1-score (macro)
    n_jobs=-1,
    verbose=2,
    random_state=42,
    return_train_score=True,
)

In [9]:
start_time = time.time()
svm_random.fit(X_train_balanced, y_train_balanced)
search_time = time.time() - start_time

Fitting 5 folds for each of 50 candidates, totalling 250 fits
[CV] END C=0.1, class_weight=balanced, degree=3, gamma=auto, kernel=linear; total time=   0.2s
[CV] END C=0.1, class_weight=balanced, degree=3, gamma=auto, kernel=linear; total time=   0.2s
[CV] END C=100, class_weight=balanced, degree=4, gamma=0.01, kernel=rbf; total time=   0.2s
[CV] END C=1, class_weight=balanced, degree=3, gamma=scale, kernel=linear; total time=   0.2s
[CV] END C=100, class_weight=balanced, degree=4, gamma=0.01, kernel=rbf; total time=   0.2s
[CV] END C=100, class_weight=balanced, degree=4, gamma=0.01, kernel=rbf; total time=   0.2s
[CV] END C=1, class_weight=balanced, degree=3, gamma=scale, kernel=linear; total time=   0.2s
[CV] END C=100, class_weight=balanced, degree=4, gamma=0.01, kernel=rbf; total time=   0.2s
[CV] END C=1, class_weight=balanced, degree=3, gamma=scale, kernel=linear; total time=   0.2s
[CV] END C=0.1, class_weight=balanced, degree=3, gamma=auto, kernel=linear; total time=   0.3s
[CV

In [15]:
svm_best = svm_random.best_estimator_
print(svm_best)


SVC(C=10, class_weight='balanced', degree=2, gamma=1, probability=True,
    random_state=42)
