In [1]:
from datasets import titanic_data

from sklearn.model_selection import cross_val_score
from sklearn.metrics import confusion_matrix
import pandas as pd
import numpy as np

from sklearn.svm import SVC

from tools import roc

import plotly.express as px
from plotly.offline import init_notebook_mode
init_notebook_mode(connected=True)

In [2]:
original_X, original_y, train_X, train_y, test_X, test_y = titanic_data()

## Linear kernel

### Estimate test error by 5-fold cross validation

In [4]:
model = SVC(kernel='linear', shrinking=False)
cross_val_score(model, original_X, original_y).mean()

0.7878601468834348

## Polynomial kernel

### Estimate the polynomial degree

In [5]:
degree = []
accuracy = []

for i in range(1, 5):
    model = SVC(kernel='poly', shrinking=False, degree=i)
    score = cross_val_score(model, original_X, original_y)
    accuracy.append(score.mean())
    degree.append(i)

result = pd.DataFrame(zip(degree, accuracy), columns=['degree', 'accuracy (mean)'])
px.line(result, x='degree', y='accuracy (mean)')

### Estimate test error by 5-fold cross validation

In [7]:
model = SVC(kernel='poly', shrinking=False, degree=3)
cross_val_score(model, original_X, original_y).mean()

0.8136840123030569

## RBF kernel
Radial basis function kernel

### Estimate test error by 5-fold cross validation

In [9]:
model = SVC(kernel='rbf', shrinking=False)
cross_val_score(model, original_X, original_y).mean()

0.8237775406440274

## Classification with RBF kernel

In [14]:
model = SVC(kernel='rbf', shrinking=False, probability=True)
model = model.fit(original_X, original_y)

## Confusion matrix and ROC

In [15]:
tn, fp, fn, tp = confusion_matrix(original_y, model.predict(original_X).ravel()).ravel()

f"TP: {tp}, TN: {tn}, FP: {fp}, FN: {fn}"

'TP: 230, TN: 522, FP: 27, FN: 112'

In [16]:
roc(model, original_X, original_y).show()