Few notes about support vector machines include:
- It is a supervised learning model
- It can be used for both classification and regression problems(SVM Classifier and Regressor)
- It is mostly used for classification, especially binary classification problems
- It works best on smaller datasets with high dimensions and more complex relationship between features(Non-linear)
- It doesn't work well with larger datasets because of the extremely long training period
- It involves dividing datapoints into classes using a line(2D) or a hyperplane(multidimension)
- Important terminologies include support vectors(data points from each class closest to the plane), and margin(distance between support vectors)
- Loss function is the hinge loss function and the goal is for the model to predict correctly and for the plotted points to be as far as possible from the 
hyperplane and this helps to confirm its membership in the assigned class

Importing the dependencies

In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

Data Collection - chose the dataset for this project because of its high dimensionality

In [2]:
sonar_data = pd.read_csv('C:\\Users\\admin\\Documents\\My books\\Machine Learning Pathway\\Datasets\\sonar data.csv', header=None)

Data Pre-processing and analysis

In [3]:
# Printing the first 5 rows

sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,R


In [4]:
# Checking the shape of the dataset

sonar_data.shape

(208, 61)

In [5]:
# Checking for missing values

sonar_data.isnull().sum()

0     0
1     0
2     0
3     0
4     0
     ..
56    0
57    0
58    0
59    0
60    0
Length: 61, dtype: int64

In [6]:
# Checking for dataset imbalance

sonar_data.value_counts(60)

60
M    111
R     97
Name: count, dtype: int64

In [7]:
# Encoding the labels of the dataset (M = 0 and R = 1)

# loading the label encoder
encoder = LabelEncoder()

# fit the data into the encoder and transforming it
labels = encoder.fit_transform(sonar_data[60])

# Appending the new labels to the data frame
sonar_data[61] = labels

# Dropping the previous labels
sonar_data.drop(columns=60, axis=1, inplace=True)

# Printing the first 5 rows again
sonar_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,61
0,0.02,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.018,0.0084,0.009,0.0032,1
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.014,0.0049,0.0052,0.0044,1
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.228,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.018,0.0244,0.0316,0.0164,0.0095,0.0078,1
3,0.01,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.015,0.0085,0.0073,0.005,0.0044,0.004,0.0117,1
4,0.0762,0.0666,0.0481,0.0394,0.059,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.011,0.0015,0.0072,0.0048,0.0107,0.0094,1


In [8]:
# Checking the standard deviation of the entire dataset - Need for standardization

sonar_data.std().sum()

9.034339292146369

In [9]:
# Checking for the mean statistics of each target

sonar_data.groupby(61).mean()

Unnamed: 0_level_0,0,1,2,3,4,5,6,7,8,9,...,50,51,52,53,54,55,56,57,58,59
61,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,0.034989,0.045544,0.05072,0.064768,0.086715,0.111864,0.128359,0.149832,0.213492,0.251022,...,0.019352,0.016014,0.011643,0.012185,0.009923,0.008914,0.007825,0.00906,0.008695,0.00693
1,0.022498,0.030303,0.035951,0.041447,0.062028,0.096224,0.11418,0.117596,0.137392,0.159325,...,0.012311,0.010453,0.00964,0.009518,0.008567,0.00743,0.007814,0.006677,0.007078,0.006024


In [10]:
# Checking the correlation of features in the dataset to the label

correlation = sonar_data.corr()
correlation

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,61
0,1.000000,0.735896,0.571537,0.491438,0.344797,0.238921,0.260815,0.355523,0.353420,0.318276,...,0.355299,0.311729,0.322299,0.312067,0.220642,0.313725,0.368132,0.357116,0.347078,-0.271694
1,0.735896,1.000000,0.779916,0.606684,0.419669,0.332329,0.279040,0.334615,0.316733,0.270782,...,0.434548,0.346076,0.383960,0.380165,0.262263,0.280341,0.353042,0.352200,0.358761,-0.231238
2,0.571537,0.779916,1.000000,0.781786,0.546141,0.346275,0.190434,0.237884,0.252691,0.219637,...,0.394076,0.332914,0.367186,0.289731,0.287661,0.380819,0.334108,0.425047,0.373948,-0.192195
3,0.491438,0.606684,0.781786,1.000000,0.726943,0.352805,0.246440,0.246742,0.247078,0.237769,...,0.374651,0.364772,0.334211,0.284955,0.280938,0.340254,0.344865,0.420266,0.400626,-0.250638
4,0.344797,0.419669,0.546141,0.726943,1.000000,0.597053,0.335422,0.204006,0.177906,0.183219,...,0.266617,0.314985,0.205306,0.196472,0.199323,0.219395,0.238793,0.290982,0.253710,-0.222232
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
56,0.313725,0.280341,0.380819,0.340254,0.219395,0.161333,0.186324,0.267212,0.193963,0.140327,...,0.191264,0.308197,0.361443,0.387204,0.515154,1.000000,0.509805,0.431295,0.287219,-0.000933
57,0.368132,0.353042,0.334108,0.344865,0.238793,0.203986,0.242646,0.287603,0.231745,0.212277,...,0.309673,0.370764,0.404117,0.503465,0.463659,0.509805,1.000000,0.550235,0.329827,-0.184191
58,0.357116,0.352200,0.425047,0.420266,0.290982,0.220573,0.183578,0.194400,0.097293,0.058273,...,0.298711,0.346095,0.447118,0.453658,0.430804,0.431295,0.550235,1.000000,0.642872,-0.130826
59,0.347078,0.358761,0.373948,0.400626,0.253710,0.178158,0.222493,0.146216,0.095243,0.097358,...,0.195379,0.280780,0.283471,0.264399,0.349449,0.287219,0.329827,0.642872,1.000000,-0.090055


Train Test Split

In [11]:
# Seperating dataset into data and target
data = sonar_data.iloc[:,:-1]
target = sonar_data[61]

# Splitting the dataset into training and test data
train_data, test_data, train_target, test_target = train_test_split(data, target, test_size=0.1, random_state=1)
print(data.shape, train_data.shape, test_data.shape)

(208, 60) (187, 60) (21, 60)


Model development

In [17]:
# Model building

class SVM_Classifier():


    def __init__(self, learning_rate, no_of_iterations, reg_parameter):
        
        # Initiating hyperparameters(learning rate, number of iterations) and regularization parameter
        self.learning_rate = learning_rate
        self.no_of_iterations = no_of_iterations
        self.reg_parameter = reg_parameter

    def fit(self, x, y):

        # Initializing the number of datapoints and features in the dataset
        self.nd, self.nf = x.shape

        # Initailizing the model parameters(weight and bias)
        self.w = np.zeros(self.nf)
        self.b = 0
        self.x = x
        self.y = y

        # Implementing gradient decent
        for j in range(self.no_of_iterations):
            self.update_parameters()

    def update_parameters(self):

        # Recall svm classifies datapoints based on a y value of -1(negative) or +1(positive)
        # Hence lets encode all labels with a value of 0 to -1 and those with a value of +1 as +1
        
        y_label = np.where(self.y <= 0, -1, 1)  # read as if the value of y is =< 0 then replace with -1 else leave it as 1

        # Calculating the derivative of the cost function with respect to w and b (dw and db)
        # Recall that for svm dw and db take on different values depending on if y_i * (wx-b) >= 1 or <1 so to implement it

        for i, x_i in enumerate(self.x):

            condition = (y_label[i] * (np.dot(x_i, self.w) - self.b)) >= 1
            
            if (condition is True):
                dw = 2 * self.reg_parameter * self.w
                db = 0
                
            else: 
                dw = (2 * self.reg_parameter * self.w) - (np.dot(x_i, y_label[i]))
                db = y_label[i]

        # Updating model parameters(weights and bias)
        self.w = self.w - (self.learning_rate * dw)
        self.b = self.b - (self.learning_rate * db)
                

    def predict(self, x):
        output = x.dot(self.w) - self.b
        prediction = np.sign(output)
        y_hat = np.where(prediction <= -1, -1, 1)
        return y_hat

Model training

In [18]:
# Loading an instance of the model

svm_classifier = SVM_Classifier(0.001, 1000, 0.01)

In [19]:
# fitting the model with the training data

svm_classifier.fit(train_data, train_target)

Model Evaluation

In [15]:
# Model evaluation on training data

train_data_prediction = svm_classifier.predict(train_data)
train_data_accuracy = accuracy_score(train_data_prediction, train_target)
train_data_accuracy

0.47058823529411764

In [16]:
# Model evaluation on test data

test_data_prediction = svm_classifier.predict(test_data)
test_data_accuracy = accuracy_score(test_data_prediction, test_target)
test_data_accuracy

0.42857142857142855