In [149]:

from sklearn.model_selection import train_test_split
from scipy.io import loadmat

from sklearn.svm import OneClassSVM

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import balanced_accuracy_score

In [135]:
dataset = loadmat('cardio.mat')

In [137]:
dataset

{'__header__': b'MATLAB 5.0 MAT-file, written by Octave 3.8.0, 2014-12-18 10:48:09 UTC',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[ 0.00491231,  0.69319077, -0.20364049, ...,  0.23149795,
         -0.28978574, -0.49329397],
        [ 0.11072935, -0.07990259, -0.20364049, ...,  0.09356344,
         -0.25638541, -0.49329397],
        [ 0.21654639, -0.27244466, -0.20364049, ...,  0.02459619,
         -0.25638541,  1.14001753],
        ...,
        [-0.41835583, -0.91998844, -0.16463485, ..., -1.49268341,
          0.24461959, -0.49329397],
        [-0.41835583, -0.91998844, -0.15093411, ..., -1.42371616,
          0.14441859, -0.49329397],
        [-0.41835583, -0.91998844, -0.20364049, ..., -1.28578165,
          3.58465295, -0.49329397]]),
 'y': array([[0.],
        [0.],
        [0.],
        ...,
        [1.],
        [1.],
        [1.]])}

In [138]:
X_train, X_test, y_train, y_test = train_test_split(dataset['X'], dataset['y'], train_size=0.4)

In [139]:
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((732, 21), (1099, 21), (732, 1), (1099, 1))

In [140]:
model = OneClassSVM(kernel='rbf', gamma=0.1, nu=0.1)
model.fit(X_train)

In [141]:
# y_train = y_train.squeeze()
# y_test = y_test.squeeze()

In [142]:
y_train.shape, y_test.shape

((732, 1), (1099, 1))

In [143]:
# convert y_train from 0 and 1 to -1 and 1
y_train = 1 - 2*y_train
y_test = 1 - 2*y_test

In [144]:
import numpy as np
np.unique(y_train), np.unique(y_test)

(array([-1.,  1.]), array([-1.,  1.]))

In [145]:
param_grid = {'kernel': ['rbf', 'linear', 'poly', 'sigmoid'], 'gamma': [1, 0.1, 0.01, 0.001], 'nu': [0.1, 0.2, 0.3, 0.4, 0.5]}

# create a pipeline with standard scaler over OneClassSVM
model = OneClassSVM()
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=5, scoring='balanced_accuracy')
pipeline = Pipeline([('scaler', StandardScaler()), ('model', grid_search)])

grid_search.fit(X_train, y_train)


In [146]:
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
best_score = grid_search.best_score_

In [147]:
best_params, best_model, best_score

({'gamma': 0.001, 'kernel': 'rbf', 'nu': 0.3},
 OneClassSVM(gamma=0.001, nu=0.3),
 0.8769256584080332)

In [148]:
# show all balanced accuracy scores from grid search
grid_search.cv_results_['mean_test_score']

array([0.50751098, 0.50603478, 0.50678105, 0.50900352, 0.5075399 ,
       0.37577677, 0.54337148, 0.52725486, 0.50714853, 0.36970428,
       0.42233197, 0.3941513 , 0.34753155, 0.30921988, 0.27590833,
       0.63969388, 0.75074588, 0.81580028, 0.78251175, 0.76788754,
       0.70220785, 0.7240632 , 0.76975163, 0.77996555, 0.76893712,
       0.37577677, 0.54337148, 0.52725486, 0.50714853, 0.36970428,
       0.4230619 , 0.3941513 , 0.34753155, 0.30921988, 0.27590833,
       0.65075325, 0.78589727, 0.82603348, 0.79480897, 0.76980095,
       0.69048215, 0.84511036, 0.86386935, 0.81959229, 0.7765112 ,
       0.37577677, 0.54337148, 0.52725486, 0.50714853, 0.36970428,
       0.37462645, 0.37442545, 0.34272629, 0.29628266, 0.27670024,
       0.53867398, 0.58313284, 0.59618886, 0.62380008, 0.64678984,
       0.69113706, 0.86098929, 0.87692566, 0.82048619, 0.77794663,
       0.37577677, 0.54337148, 0.52725486, 0.50714853, 0.36970428,
       0.20705713, 0.24801912, 0.25455473, 0.28061311, 0.27040

In [150]:
model = OneClassSVM(kernel=best_params['kernel'], gamma=best_params['gamma'], nu=best_params['nu'])
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

balanced_accuracy_score(y_test, y_pred)

0.8808908908908909