In [None]:
import numpy as np
import pandas as pd
from sklearn.svm import SVC
from sklearn.model_selection import StratifiedKFold, GridSearchCV
from sklearn.metrics import roc_auc_score, make_scorer, confusion_matrix
import warnings
warnings.filterwarnings('ignore')
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/pima-indians-diabetes.data.csv'
columns = ["Pregnancies", "Glucose", "BloodPressure", "SkinThickness", 
           "Insulin", "BMI", "DiabetesPedigreeFunction", "Age", "Outcome"]
data = pd.read_csv(url, names=columns)
X = data.iloc[:, :-1].values
y = data['Outcome'].values
def gmean_score(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    if cm.shape != (2, 2):
        return 0
    TN, FP, FN, TP = cm.ravel()
    sensitivity = TP / (TP + FN) if (TP + FN) > 0 else 0
    specificity = TN / (TN + FP) if (TN + FP) > 0 else 0
    return np.sqrt(sensitivity * specificity)

gmean_scorer = make_scorer(gmean_score)

C_range = np.power(2.0, np.arange(-20, 21))
sigma_range = np.power(2.0, np.arange(-20, 21))
gamma_range = [1 / (2 * (sigma ** 2)) for sigma in sigma_range]

cv = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)

print("Linear Kernel SVM: ")
svc_linear = SVC(kernel='linear', probability=True)

param_grid_linear = {'C': C_range}
grid_linear_auc = GridSearchCV(svc_linear, param_grid_linear,
                               scoring='roc_auc', cv=cv, n_jobs=-1)
grid_linear_auc.fit(X, y)
best_linear_auc = grid_linear_auc.best_score_
best_linear_auc_params = grid_linear_auc.best_params_

grid_linear_gmean = GridSearchCV(svc_linear, param_grid_linear,
                                 scoring=gmean_scorer, cv=cv, n_jobs=-1)
grid_linear_gmean.fit(X, y)
best_linear_gmean = grid_linear_gmean.best_score_
best_linear_gmean_params = grid_linear_gmean.best_params_

print("Best ROC AUC: {:.4f} ".format(best_linear_auc))
print("Best Gmean:   {:.4f} ".format(best_linear_gmean))

print("\nRBF Kernel SVM: ")
svc_rbf = SVC(kernel='rbf', probability=True)

param_grid_rbf = {'C': C_range, 'gamma': gamma_range}

grid_rbf_auc = GridSearchCV(svc_rbf, param_grid_rbf,
                            scoring='roc_auc', cv=cv, n_jobs=-1)
grid_rbf_auc.fit(X, y)
best_rbf_auc = grid_rbf_auc.best_score_
best_rbf_auc_params = grid_rbf_auc.best_params_

grid_rbf_gmean = GridSearchCV(svc_rbf, param_grid_rbf,
                              scoring=gmean_scorer, cv=cv, n_jobs=-1)
grid_rbf_gmean.fit(X, y)
best_rbf_gmean = grid_rbf_gmean.best_score_
best_rbf_gmean_params = grid_rbf_gmean.best_params_

print("Best ROC AUC: {:.4f} ".format(best_rbf_auc))
print("Best Gmean:   {:.4f} ".format(best_rbf_gmean))


----- Linear Kernel SVM -----


KeyboardInterrupt: 