#**Support Vector Machines (SVM) Tutorial:**

In [16]:
import pandas as pd
import numpy as np
from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV
import joblib
from sklearn.utils.testing import ignore_warnings
from sklearn.exceptions import ConvergenceWarning



###**Read titanic data stored previously:**

In [10]:
Xtr = pd.read_csv('Xtr.csv')
ytr = pd.read_csv('ytr.csv')

In [11]:
print(Xtr.shape)
print(ytr.shape)

(534, 6)
(534, 1)


####**Function to print grid search cv's accuracy for each value of C:**

In [12]:
def print_results(results):
    print('BEST PARAMS: {}\n'.format(results.best_params_))

    means = results.cv_results_['mean_test_score']
    stds = results.cv_results_['std_test_score']
    for mean, std, params in zip(means, stds, results.cv_results_['params']):
        print('{} (+/-{}) for {}'.format(round(mean, 3), round(std * 2, 3), params))

In [23]:
svc = SVC(max_iter=10000)
parameters = {
    'kernel': ['linear', 'rbf', 'poly'],
    'C': [0.1, 1, 10]
}
cv = GridSearchCV(svc, param_grid=parameters, cv=5)
cv.fit(Xtr, ytr['Survived'].values.ravel())
print_results(cv)



BEST PARAMS: {'C': 0.1, 'kernel': 'linear'}

0.792 (+/-0.122) for {'C': 0.1, 'kernel': 'linear'}
0.654 (+/-0.06) for {'C': 0.1, 'kernel': 'rbf'}
0.644 (+/-0.043) for {'C': 0.1, 'kernel': 'poly'}
0.764 (+/-0.054) for {'C': 1, 'kernel': 'linear'}
0.661 (+/-0.048) for {'C': 1, 'kernel': 'rbf'}
0.64 (+/-0.027) for {'C': 1, 'kernel': 'poly'}
0.573 (+/-0.248) for {'C': 10, 'kernel': 'linear'}
0.684 (+/-0.07) for {'C': 10, 'kernel': 'rbf'}
0.669 (+/-0.054) for {'C': 10, 'kernel': 'poly'}




In [25]:
model = cv.best_estimator_
model

SVC(C=0.1, break_ties=False, cache_size=200, class_weight=None, coef0=0.0,
    decision_function_shape='ovr', degree=3, gamma='scale', kernel='linear',
    max_iter=10000, probability=False, random_state=None, shrinking=True,
    tol=0.001, verbose=False)

####**Pickle the model:**

In [26]:
joblib.dump(model, 'SVM_model.pkl')

['SVM_model.pkl']