In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

In [3]:
df = pd.read_csv("D:/Stat/datasets/Heartdesease/heart.csv")
df.head()
train = df.drop('target', axis=1)
test = df['target']

In [4]:
x_train, x_test, y_train, y_test = train_test_split(train, test, test_size=.2)

In [5]:
x_train = x_train.to_numpy()
x_test = x_test.to_numpy()
y_train = y_train.to_numpy()
y_test = y_test.to_numpy()

In [6]:
class LinearSVMUsingSoftMargin:
    def __init__(self, C=1.0):
        self._support_vectors = None
        self.C = C
        self.beta = None
        self.b = None
        self.X = None
        self.y = None
 
        # n is the number of data points
        self.n = 0
 
        # d is the number of dimensions
        self.d = 0
    
    def __decision_function(self, X): #aka linear kernel
        return X.dot(self.beta) + self.b
    
    def __cost(self, margin): # function to be used with gradient descent
        return (1 / 2) * self.beta.dot(self.beta) + self.C * np.sum(np.maximum(0, 1 - margin))
    
    def __margin(self, X, y):
        return y * self.__decision_function(X)
    
    def fit(self, X, y, lr=0.001, epochs=500):
        # Initialize Beta and b
        self.n, self.d = X.shape
        self.beta = np.random.randn(self.d)
        self.b = 0
    
        # Required only for plotting
        self.X = X
        self.y = y
    
        loss_array = []
        for _ in range(epochs):
            margin = self.__margin(X, y)
            loss = self.__cost(margin)
            loss_array.append(loss)
            misclassified_pts_idx = np.where(margin < 1)[0]    
            d_beta = self.beta - self.C * y[misclassified_pts_idx].dot(X[misclassified_pts_idx])
            self.beta = self.beta - lr * d_beta
            d_b = - self.C * np.sum(y[misclassified_pts_idx])
            self.b = self.b - lr * d_b
        self._support_vectors = np.where(self.__margin(X, y) <= 1)[0]
    
    def predict(self, X):
        return np.sign(self.__decision_function(X))

In [7]:
model = LinearSVMUsingSoftMargin()

In [8]:
model.fit(x_train, y_train)

In [9]:
model.predict(x_test)

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
       1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [10]:
y_test

array([1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
       1, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 1], dtype=int64)

In [11]:
from sklearn.metrics import confusion_matrix, accuracy_score

In [12]:
print(confusion_matrix(model.predict(x_test), y_test))

[[ 0  0]
 [33 28]]


In [13]:
accuracy_score(model.predict(x_test), y_test)

0.45901639344262296

In [14]:
from sklearn.svm import SVC

model_sk = SVC(kernel='linear')

In [15]:
model_sk.fit(x_train, y_train)

SVC(kernel='linear')

In [16]:
model_sk.predict(x_test)

array([1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0,
       1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0,
       1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 1], dtype=int64)

In [17]:
confusion_matrix(model_sk.predict(x_test), y_test)

array([[25,  4],
       [ 8, 24]], dtype=int64)

In [20]:
accuracy_score(model_sk.predict(x_test), y_test)

0.8032786885245902