# Model: Support Vector Machine

In [1]:
# Import relevant packages
import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np
import pandas as pd
import seaborn as sns
import pickle


from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import cross_val_score, train_test_split, GridSearchCV, StratifiedKFold
from sklearn.metrics import make_scorer, accuracy_score, recall_score, roc_auc_score, precision_score, balanced_accuracy_score
from sklearn.metrics import plot_confusion_matrix, auc, roc_curve, plot_roc_curve, plot_precision_recall_curve, precision_recall_curve, confusion_matrix

from sklearn.svm import SVC

from utils import process_data, split_train, grid_search

In [2]:
data = process_data(type_ = 'normal')

Beginning data processing ...
Reading in batched data ...


100%|██████████████████████████████████████████████████████████████████████████████████| 19/19 [00:21<00:00,  1.13s/it]


Splitting data into train and test ...
Scaling data ...
Completed normal data processing.


In [3]:
X_train = data['X_train_scaled']
y_train = data['y_train']

X_test = data['X_test_scaled']
y_test = data['y_test']

## SVM Model

In [7]:
X_input, y_input = split_train(X_train, y_train, keep_percent=0.1)

Splitting train ...


In [2]:
criterion = make_scorer(roc_auc_score, needs_proba=True)

clf_svm = SVC(probability = True)

C = [0.1, 1, 10]
param_grid = [
    {'kernel': ['linear'], 'C': C},
    {'kernel': ['poly'], 'degree': [2, 3, 4], 'C': C},
    {'kernel': ['rbf'], 'gamma': [0.001, 0.01, 0.1], 'C': C}
]

grid_result_svm = grid_search(clf_svm, criterion, param_grid, k=5, X=X_input, y=y_input)

# Choose best parameters from hyperparameter tuning
clf_svm = grid_result_svm.best_estimator_

# save best svm model
pickle.dump(grid_result_svm, open('models/grid_svm.sav', 'wb'))
pickle.dump(clf_svm, open('models/model_svm.sav', 'wb'))

In [None]:
# svm_output = svm_model(X_input, y_input, X_test, y_test, standardize = False, scoring = scoring_metric,
#                        test = test_mode, folds = cv_folds)

In [None]:
# svm_output