In [5]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

class SVM:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape
        # If y smaller than or equal to 0 assign -1 else assign 1
        y_ = np.where(y <= 0, -1, 1)

        # initial weights
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idex, x_i in enumerate(X):
                # Correctly classified
                correct = y_[idex] * (np.dot(x_i, self.w) - self.b) >= 1
                if correct:
                    # Update w with gradient
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    # Update w and b with gradient
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y_[idex]))
                    self.b -= self.lr * y_[idex]


    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        # Make sure 0 or 1 is returned
        result = np.sign(approx)
        return np.where(result > 0, 1, 0)

In [6]:
# Added function to make SVM work with sklearn
def read_data_and_return_arrays(file_path, target_column):
    # Read the CSV file into a DataFrame
    df = pd.read_csv(file_path)
    #sample = df.sample(200, random_state=92)
    # Extract features
    features = df.drop(columns=[target_column])
    data = features.values
    
    # Extract target variable
    target = df[target_column].astype(int).values
    
    return data, target

In [7]:
data, target = read_data_and_return_arrays(
            file_path="no_balancing/train.csv", target_column='Diabetes_binary'
        )
X = data
y = target

In [8]:
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=92)

svm = SVM()
svm.fit(x_train, y_train)
predictions = svm.predict(x_test)
score = accuracy_score(y_test, predictions)
print('predictions:', predictions)
print('accuracy:', score)

predictions: [0 0 0 ... 0 0 0]
accuracy: 0.8574666579068392


In [9]:
predictions

array([0, 0, 0, ..., 0, 0, 0])

In [10]:
y

array([0, 0, 0, ..., 0, 0, 0])