In this part, you will be using the credit card fraud detection dataset from https://www.kaggle.com/datasets/mlg-ulb/creditcardfraud to train and test a Support Vector Machine (SVM) classifier. Your task
is to:

1. Download the data and split the dataset into training and testing sets (80-20 split) in a stratified manner to take care of the class imbalance. You need to code the stratified splitting function from scratch. *sklearn is not allowed for this part*
1. Implement the basic Pegasos Algorithm from the paper https://home.ttic.edu/~nati/Publications/PegasosMPB.pdf. This is in page 5, Fig 1.
1. Implement the mini-batch Pegasos algorithm from the paper https://home.ttic.edu/~nati/Publications/PegasosMPB.pdf. Do not forget the projection step. This is in page 6, Fig 2.
1. Implement the dual coordinate descent method for SVM’s from the paper https://icml.cc/Conferences/2008/papers/166.pdf. This is Algorithm 1 in the paper.
1. Report a final accuracy on the test set for all 3 approches.

In [7]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [8]:

data = pd.read_csv("/Users/nasibhuseynzade/Downloads/creditcard.csv", sep=',')  
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

y = np.where(y <= 0, -1, 1)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, shuffle=True)


In [9]:
class SVM:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init weights
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.n_iters):
            for idx, x_i in enumerate(X):
                condition = y[idx] * (np.dot(x_i, self.w) - self.b) >= 1
                if condition:
                    self.w -= self.lr * (2 * self.lambda_param * self.w)
                else:
                    self.w -= self.lr * (2 * self.lambda_param * self.w - np.dot(x_i, y[idx]))
                    self.b -= self.lr * y[idx]


    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)

In [10]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy   
    
clf = SVM()
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

print("SVM classification accuracy", accuracy(y_test, predictions))

SVM classification accuracy 0.998139110284049


In [11]:
class SVM_1:

    def __init__(self, lambda_param=0.01, n_iters=1000):
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.w = None
        self.b = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # Init weights
        self.w = np.zeros(n_features)
        self.b = 0

        for t in range(1, self.n_iters + 1):
            nu = 1 / (self.lambda_param * t)
            
            for idx, x_i in enumerate(X):
                condition = y[idx] * (np.dot(self.w, x_i) - self.b) < 1
                
                if condition:
                    self.w = (1 - nu * self.lambda_param) * self.w + nu * y[idx] * x_i
                else:
                    self.w = (1 - nu * self.lambda_param) * self.w

    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)


In [12]:
clf_1 = SVM_1()
clf_1.fit(X_train, y_train)
predictions_1 = clf_1.predict(X_test)

print("SVM classification accuracy for pegasos algorithm", accuracy(y_test, predictions_1))

SVM classification accuracy for pegasos algorithm 0.9981742214107651


In [13]:
class SVM_2:

    def __init__(self, learning_rate=0.001, lambda_param=0.01, n_iters=1000, batch_size=32):
        self.lr = learning_rate
        self.lambda_param = lambda_param
        self.n_iters = n_iters
        self.batch_size = batch_size
        self.w = None
        self.b = None

    def mini_batch_pegasos(self, X, y):
        m, n = X.shape
        self.w = np.zeros(n)
        self.b = 0

        for epoch in range(1, self.n_iters + 1):
            # Choose A in [m] with size k uniformly at random
            A = np.random.choice(m, size=self.batch_size, replace=False)
            
            
            A_positive = [i for i in A if y[i] * np.dot(self.w, X[i]) < 1]

            nu = 1 / (self.lambda_param * epoch)

            # Compute the sub-gradient
            subgrad_w = self.lambda_param * self.w
            if A_positive:
                subgrad_w += np.sum(y[A_positive][:, np.newaxis] * X[A_positive], axis=0) / self.batch_size

            # Update w using the sub-gradient
            self.w = (1 - nu * self.lambda_param) * self.w + (nu / self.batch_size) * subgrad_w

            # Apply a step-size adjustment to ensure convergence
            w_norm = np.linalg.norm(self.w)
            
            if 1 / np.sqrt(self.lambda_param) < w_norm:
                self.w = self.w / np.sqrt(self.lambda_param) / w_norm

    def fit(self, X, y):
        self.mini_batch_pegasos(X, y)

    def predict(self, X):
        approx = np.dot(X, self.w) - self.b
        return np.sign(approx)


In [14]:
clf_2 = SVM_2()
clf_2.fit(X_train, y_train)
predictions_2 = clf_2.predict(X_test)

print("SVM classification accuracy for mini-batch pegasos algorithm", accuracy(y_test, predictions_2))

SVM classification accuracy for mini-batch pegasos algorithm 0.9981742214107651
