<a href="https://colab.research.google.com/github/jw9603/machine_learning/blob/main/Support_Vector_Machine.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Download a data file

In [None]:
!gdown --id 1qcQOGCaw7_DBxytT4hEulojctuLTgZaI

Downloading...
From: https://drive.google.com/uc?id=1qcQOGCaw7_DBxytT4hEulojctuLTgZaI
To: /content/Default.csv
100% 21.9k/21.9k [00:00<00:00, 21.4MB/s]


### Import libraries

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn import svm, metrics
import numpy as np

In [None]:
# Load the data file
data = pd.read_csv("./Default.csv", header=0)
print(data)

y_vec = data['default'].to_numpy()
x_mat = data[['balance', 'income']].to_numpy()

     Unnamed: 0  default  student      balance       income
0             1        0        0   729.526495  44361.62507
1             2        0        1   817.180407  12106.13470
2             3        0        0  1073.549164  31767.13895
3             4        0        0   529.250605  35704.49394
4             5        0        0   785.655883  38463.49588
..          ...      ...      ...          ...          ...
661        9913        1        0  2148.898454  44309.91717
662        9922        1        1  1627.898323  17546.99702
663        9950        1        0  1750.253150  51578.94016
664        9952        1        0  1515.606239  48688.51209
665        9979        1        0  2202.462395  47287.25711

[666 rows x 5 columns]


In [None]:
print(y_vec.shape, x_mat.shape)

(666,) (666, 2)


### Basic SVM


In [None]:
def basic_svm(x_mat, y_vec, rs=108):
    x_train, x_test, y_train, y_test = train_test_split(x_mat, y_vec, test_size=0.2, random_state=rs)

    clf = svm.SVC()
    clf.fit(x_train, y_train)

    predicted_y_test = clf.predict(x_test)

    accuracy = metrics.accuracy_score(y_test, predicted_y_test)
    # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
    precision, recall, f1, _ = metrics.precision_recall_fscore_support(y_test, predicted_y_test, average='binary')
    # https://scikit-learn.org/stable/modules/generated/sklearn.metrics.precision_recall_fscore_support.html
    print("Basic SVM")
    print("Accuracy: {:.3f}".format(accuracy))
    print("Precision: {:.3f}".format(precision))
    print("Recall: {:.3f}".format(recall))
    print("F1 score: {:.3f}".format(f1))
    print()

In [None]:
basic_svm(x_mat, y_vec)

Basic SVM
Accuracy: 0.575
Precision: 0.596
Recall: 0.500
F1 score: 0.544



### Kernels

In [None]:
def basic_svm_kernel(x_mat, y_vec, kernel='rbf', rs=108):
    x_train, x_test, y_train, y_test = train_test_split(x_mat, y_vec, test_size=0.2, random_state=rs)

    clf = svm.SVC(kernel=kernel)
    clf.fit(x_train, y_train)

    predicted_y_test = clf.predict(x_test)

    accuracy = metrics.accuracy_score(y_test, predicted_y_test)
    precision, recall, f1, _ = metrics.precision_recall_fscore_support(y_test, predicted_y_test, average='binary')

    print("Basic SVM with kernel - {}".format(kernel))
    print("Accuracy: {:.3f}".format(accuracy))
    print("Precision: {:.3f}".format(precision))
    print("Recall: {:.3f}".format(recall))
    print("F1 score: {:.3f}".format(f1))
    print()

In [None]:
basic_svm_kernel(x_mat, y_vec, kernel='linear')

basic_svm_kernel(x_mat, y_vec, kernel='poly')

basic_svm_kernel(x_mat, y_vec, kernel='rbf')

Basic SVM with kernel - linear
Accuracy: 0.858
Precision: 0.845
Recall: 0.882
F1 score: 0.863

Basic SVM with kernel - poly
Accuracy: 0.843
Precision: 0.813
Recall: 0.897
F1 score: 0.853

Basic SVM with kernel - rbf
Accuracy: 0.575
Precision: 0.596
Recall: 0.500
F1 score: 0.544




### Penalty Parameter

In [None]:
def basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=1.0, rs=108):
    x_train, x_test, y_train, y_test = train_test_split(x_mat, y_vec, test_size=0.2, random_state=rs)

    clf = svm.SVC(kernel=kernel, C=penalty)
    clf.fit(x_train, y_train)

    predicted_y_test = clf.predict(x_test)

    accuracy = metrics.accuracy_score(y_test, predicted_y_test)
    precision, recall, f1, _ = metrics.precision_recall_fscore_support(y_test, predicted_y_test, average='binary')

    print("Basic SVM with kernel - {} with C - {}".format(kernel, penalty))
    print("Accuracy: {:.3f}".format(accuracy))
    print("Precision: {:.3f}".format(precision))
    print("Recall: {:.3f}".format(recall))
    print("F1 score: {:.3f}".format(f1))
    print()

In [None]:
basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=0.1)

basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=1.0)

basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=5.0)

basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=10.0)

basic_svm_kernel_penalty(x_mat, y_vec, kernel='rbf', penalty=100.0)

Basic SVM with kernel - rbf with C - 0.1
Accuracy: 0.537
Precision: 0.556
Recall: 0.441
F1 score: 0.492

Basic SVM with kernel - rbf with C - 1.0
Accuracy: 0.575
Precision: 0.596
Recall: 0.500
F1 score: 0.544

Basic SVM with kernel - rbf with C - 5.0
Accuracy: 0.843
Precision: 0.813
Recall: 0.897
F1 score: 0.853

Basic SVM with kernel - rbf with C - 10.0
Accuracy: 0.843
Precision: 0.813
Recall: 0.897
F1 score: 0.853

Basic SVM with kernel - rbf with C - 100.0
Accuracy: 0.851
Precision: 0.816
Recall: 0.912
F1 score: 0.861



### Cross Validation




In [None]:
def cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=1.0):
    clf = svm.SVC(kernel=kernel, C=penalty)
    score_func = metrics.make_scorer(metrics.f1_score, average='binary')
    cross_val_score_result = cross_val_score(clf, x_mat, y_vec, scoring=score_func, cv=5)
    print("Basic SVM with kernel - {} with C - {}".format(kernel, penalty))
    print("Avg F1 score: {:.3f}, Std F1 score:{:.3f}".format(np.mean(cross_val_score_result),
                                                             np.std(cross_val_score_result)))
    print()
    

In [None]:
cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=0.1)

cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=1.0)

cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=5.0)

cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=10.0)

cross_validation_svm(x_mat, y_vec, kernel='rbf', penalty=100.0)

Basic SVM with kernel - rbf with C - 0.1
Avg F1 score: 0.518, Std F1 score:0.055

Basic SVM with kernel - rbf with C - 1.0
Avg F1 score: 0.572, Std F1 score:0.075

Basic SVM with kernel - rbf with C - 5.0
Avg F1 score: 0.875, Std F1 score:0.022

Basic SVM with kernel - rbf with C - 10.0
Avg F1 score: 0.883, Std F1 score:0.013

Basic SVM with kernel - rbf with C - 100.0
Avg F1 score: 0.883, Std F1 score:0.014

