# Support Vector Machines

<img src='img/svm.png'>

[Image Source](https://towardsdatascience.com/support-vector-machine-vs-logistic-regression-94cc2975433f)

In [1]:
import pickle as pkl

with open('../data/titanic_tansformed.pkl', 'rb') as f:
    df_data = pkl.load(f)

In [2]:
df_data.head()

Unnamed: 0,Survived,Age,SibSp,Parch,Fare,2,3,male,Q,S
0,0,22.0,1,0,7.25,0,1,1,0,1
1,1,38.0,1,0,71.2833,0,0,0,0,0
2,1,26.0,0,0,7.925,0,1,0,0,1
3,1,35.0,1,0,53.1,0,0,0,0,1
4,0,35.0,0,0,8.05,0,1,1,0,1


In [4]:
df_data.shape

(889, 10)

In [5]:
data = df_data.drop("Survived",axis=1)
label = df_data["Survived"]

In [6]:
from sklearn.model_selection import train_test_split  
data_train, data_test, label_train, label_test = train_test_split(data, label, test_size = 0.2, random_state = 101)

In [7]:
from sklearn.svm import SVC
import time

# Run Logistic Regression
tic = time.time()
svm_cla = SVC(kernel='linear')
svm_cla.fit(data_train, label_train)
print('Time taken for training SVM ', (time.time()-tic), 'secs')

predictions = svm_cla.predict(data_test)
print('Accuracy', svm_cla.score(data_test, label_test))
              
from sklearn.metrics import classification_report, confusion_matrix                
print(confusion_matrix(label_test, predictions))  
print(classification_report(label_test, predictions)) 

Time taken for training SVM  6.868283748626709 secs
Accuracy 0.7865168539325843
[[95 12]
 [26 45]]
             precision    recall  f1-score   support

          0       0.79      0.89      0.83       107
          1       0.79      0.63      0.70        71

avg / total       0.79      0.79      0.78       178



### SVM Parameters

In [8]:
print(svm_cla.coef_)

[[-1.30282551e-04 -2.28468122e-03 -1.20117001e-03  2.96674936e-05
  -2.94167785e-03 -4.64304156e-03 -2.00130510e+00  3.20774925e-04
  -1.11681822e-03]]


## Hyper parameters

### a. Changing kernel

In [9]:
from sklearn.svm import SVC
import time

# Run Logistic Regression
tic = time.time()
svm_cla = SVC(kernel='poly', degree=3)
svm_cla.fit(data_train, label_train)
print('Time taken for training SVM ', (time.time()-tic), 'secs')

predictions = svm_cla.predict(data_test)
              
from sklearn.metrics import classification_report, confusion_matrix                
print(confusion_matrix(label_test, predictions))  
print(classification_report(label_test, predictions)) 

Time taken for training SVM  483.8420338630676 secs
[[91 16]
 [26 45]]
             precision    recall  f1-score   support

          0       0.78      0.85      0.81       107
          1       0.74      0.63      0.68        71

avg / total       0.76      0.76      0.76       178



### b. C value

<img src='img/svm_hyperparameter.gif'>