# Support Vector Machines

Table of contents

✔ Chapter 1.  Support Vector Machines

Chapter 2. Hyperparameter tuning

To install further python libraries, type

`!pip install --target=$my_path [LIBRARY_NAME]`

# Chapter 1-1. Implement from scratch



Download the dataset


*   data source: https://www.kaggle.com/datasets/uciml/breast-cancer-wisconsin-data



In [1]:
!wget --no-check-certificate 'https://docs.google.com/uc?export=download&id=1CuV1B9jrXgUwm01zc1aNTDXu35lq3T_1' -O cancer.csv

--2022-10-12 11:23:22--  https://docs.google.com/uc?export=download&id=1CuV1B9jrXgUwm01zc1aNTDXu35lq3T_1
Resolving docs.google.com (docs.google.com)... 173.194.214.139, 173.194.214.100, 173.194.214.102, ...
Connecting to docs.google.com (docs.google.com)|173.194.214.139|:443... connected.
HTTP request sent, awaiting response... 303 See Other
Location: https://doc-0o-7g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/r9o7pcp6te9ehmt9c4dhkfock7p74ipa/1665573750000/12385986347045621890/*/1CuV1B9jrXgUwm01zc1aNTDXu35lq3T_1?e=download&uuid=cc947225-4c4b-4ae3-bfa8-d8719f3bcffc [following]
--2022-10-12 11:23:22--  https://doc-0o-7g-docs.googleusercontent.com/docs/securesc/ha0ro937gcuc7l7deffksulhg5h7mbp1/r9o7pcp6te9ehmt9c4dhkfock7p74ipa/1665573750000/12385986347045621890/*/1CuV1B9jrXgUwm01zc1aNTDXu35lq3T_1?e=download&uuid=cc947225-4c4b-4ae3-bfa8-d8719f3bcffc
Resolving doc-0o-7g-docs.googleusercontent.com (doc-0o-7g-docs.googleusercontent.com)... 108.177.13.132, 2607:f

In [2]:
import pandas as pd
df = pd.read_csv('cancer.csv') # or use scikit-learn (from sklearn.datasets import load_breast_cancer)
df.head()

Unnamed: 0,id,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,...,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst,Unnamed: 32
0,842302,M,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,
1,842517,M,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,
2,84300903,M,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,
3,84348301,M,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,
4,84358402,M,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,


In [3]:
print(df.shape)

(569, 33)


Preprocess the data

In [4]:
# transform the categories M and B into 1 and -1, respectively.
diagnosis_map = {'M':1, 'B':-1} # malignant or benign
df['diagnosis'] = df['diagnosis'].map(diagnosis_map)
# drop last column (extra column added by pd) and  first column (id)
df.drop(df.columns[[-1, 0]], axis=1, inplace=True)

In [6]:
df.head()

Unnamed: 0,diagnosis,radius_mean,texture_mean,perimeter_mean,area_mean,smoothness_mean,compactness_mean,concavity_mean,concave points_mean,symmetry_mean,...,radius_worst,texture_worst,perimeter_worst,area_worst,smoothness_worst,compactness_worst,concavity_worst,concave points_worst,symmetry_worst,fractal_dimension_worst
0,1,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,...,25.38,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189
1,1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,...,24.99,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902
2,1,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,...,23.57,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758
3,1,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,...,14.91,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173
4,1,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,...,22.54,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678


In [7]:
from sklearn.preprocessing import StandardScaler

# create X and Y
Y = df.loc[:, 'diagnosis'].astype(float)
X = df.iloc[:, 1:]
# normalize the features using StandardScaler from sklearn.preprocessing
X_normalized = StandardScaler().fit_transform(X.values)
X = pd.DataFrame(X_normalized)

Split into training and testing sets

In [8]:
from sklearn.model_selection import train_test_split
# insert 1 in every row for intercept b
X.insert(loc=len(X.columns), column='intercept', value=1)

# split data into train and test set
print("splitting dataset into train and test sets...")
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

splitting dataset into train and test sets...


Define the cost function


*   Note that we are using the hinge loss (refer to Lab2 slides)

In [9]:
import numpy as np
def cost_function(W, X, Y, C=10000):
    # calculate hinge loss
    N = X.shape[0]
    distances = 1 - Y * (np.dot(X, W))
    distances[distances < 0] = 0  # equivalent to max(0, distance)
    hinge_loss = C * (np.sum(distances) / N)

    # calculate cost
    cost = 1 / 2 * np.dot(W, W) + hinge_loss
    return cost

Define the gradient of the cost function

In [10]:
def get_cost_gradient(W, X_batch, Y_batch, C=10000):
    # for stochastic gradient descent
    if type(Y_batch) == np.float64:
        
        Y_batch = np.array([Y_batch])
        X_batch = np.array([X_batch])  # gives multidimensional array
    
    distance = 1 - (Y_batch * np.dot(X_batch, W))
    dw = np.zeros(len(W))
    for ind, d in enumerate(distance):
        if max(0, d) == 0:
            di = W
        else:
            di = W - (C * Y_batch[ind] * X_batch[ind])
        dw += di

    dw = dw/len(Y_batch)  # average
    return dw

Use stochastic gradient descent

In [11]:
from sklearn.utils import shuffle

def stochastic_gradient_descent(features, outputs, learning_rate=0.000001):
    max_epochs = 5000
    weights = np.zeros(features.shape[1])
    nth = 0
    prev_cost = float("inf")
    cost_threshold = 0.01  # in percent
    # stochastic gradient descent
    for epoch in range(1, max_epochs):
        
        X, Y = shuffle(features, outputs)
        for ind, x in enumerate(X):
            
            ascent = get_cost_gradient(weights, x, Y[ind])
            weights = weights - (learning_rate * ascent)

        # convergence check on 2^nth epoch
        if epoch == 2 ** nth or epoch == max_epochs - 1:
            cost = cost_function(weights, features, outputs)
            print("Epoch is: {} and Cost is: {}".format(epoch, cost))
            # stopping criteria to assume the covergnece
            if abs(prev_cost - cost) < cost_threshold * prev_cost:
                return weights
            prev_cost = cost
            nth += 1
    return weights

Training

In [12]:
# train the model
print("training started...")

W = stochastic_gradient_descent(X_train.to_numpy(), y_train.to_numpy())
print("training finished.")
print("weights are: {}".format(W))

training started...
Epoch is: 1 and Cost is: 913.4857671744835
Epoch is: 2 and Cost is: 696.332661221101
Epoch is: 4 and Cost is: 595.5691376106556
Epoch is: 8 and Cost is: 534.9322502557336
Epoch is: 16 and Cost is: 454.57487693450923
Epoch is: 32 and Cost is: 431.0080356120104
Epoch is: 64 and Cost is: 376.3260925775941
Epoch is: 128 and Cost is: 334.02763040930466
Epoch is: 256 and Cost is: 295.54056503756004
Epoch is: 512 and Cost is: 266.5604366371275
Epoch is: 1024 and Cost is: 252.95288464519226
Epoch is: 2048 and Cost is: 285.8364631397084
Epoch is: 4096 and Cost is: 253.04963282707345
Epoch is: 4999 and Cost is: 237.6942525785037
training finished.
weights are: [ 0.19538209 -0.08424297 -0.53463872  0.10829251 -0.27312193 -3.71747394
  2.36701926  3.9444207  -0.24225862  1.65897068  3.62780032 -0.89248757
 -1.79218664  1.69986622  0.771239    1.23075813 -2.4274218   1.62861801
 -1.05293775 -2.72328657  2.15773973  2.19619045 -1.15918776  2.09086268
 -0.34884782 -0.65025052  2.5

In [13]:
# testing the model
from sklearn.metrics import accuracy_score, recall_score, precision_score
print("testing the model...")
y_train_predicted = np.array([])
for i in range(X_train.shape[0]):
    yp = np.sign(np.dot(X_train.to_numpy()[i], W))
    y_train_predicted = np.append(y_train_predicted, yp)

y_test_predicted = np.array([])
for i in range(X_test.shape[0]):
    yp = np.sign(np.dot(X_test.to_numpy()[i], W))
    y_test_predicted = np.append(y_test_predicted, yp)

print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_test_predicted)))
print("recall on test dataset: {}".format(recall_score(y_test, y_test_predicted)))
print("precision on test dataset: {}".format(recall_score(y_test, y_test_predicted)))

testing the model...
accuracy on test dataset: 0.9649122807017544
recall on test dataset: 0.9767441860465116
precision on test dataset: 0.9767441860465116


# Chapter 1-2. Use scikit-learn library


Use linear kernel

In [14]:
from sklearn.svm import LinearSVC, SVC
classifier = LinearSVC(C=10000, loss='hinge')
classifier.fit(X_train, y_train)



LinearSVC(C=10000, loss='hinge')

In [15]:
y_pred = classifier.predict(X_test)




In [16]:
print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_pred)))
print("recall on test dataset: {}".format(recall_score(y_test, y_pred)))
print("precision on test dataset: {}".format(recall_score(y_test, y_pred)))

accuracy on test dataset: 0.9122807017543859
recall on test dataset: 0.9534883720930233
precision on test dataset: 0.9534883720930233


Use rbf kernel

In [17]:
classifier_w_rbf = SVC(kernel = 'rbf', random_state = 0, C=10000)
classifier_w_rbf.fit(X_train, y_train)



SVC(C=10000, random_state=0)

In [18]:
y_pred = classifier_w_rbf.predict(X_test)




In [19]:
print("accuracy on test dataset: {}".format(accuracy_score(y_test, y_pred)))
print("recall on test dataset: {}".format(recall_score(y_test, y_pred)))
print("precision on test dataset: {}".format(recall_score(y_test, y_pred)))

accuracy on test dataset: 0.9385964912280702
recall on test dataset: 0.9534883720930233
precision on test dataset: 0.9534883720930233


Use cross-validation

In [20]:
from sklearn.model_selection import cross_val_score

scores = cross_val_score(classifier_w_rbf, X_test, y_test, cv=5) 



In [23]:
print('cross-val-score: {}'.format(scores))
print('cross-val-score.mean: {}'.format(scores.mean()))

cross-val-score: [0.95652174 0.91304348 1.         0.91304348 0.90909091]
cross-val-score.mean: 0.9383399209486166
