# SVM

## Classes

0 -> Not Fraud

1 -> Fraud

In [None]:
import pandas as pd
import numpy as np

In [None]:
data: pd.DataFrame = pd.read_csv("../creditcard.csv")
data["Class"] = np.where(data["Class"] <= 0, -1, 1)

data.shape

(284807, 31)

In [None]:
data.head()

Unnamed: 0,Time,V1,V2,V3,V4,V5,V6,V7,V8,V9,...,V21,V22,V23,V24,V25,V26,V27,V28,Amount,Class
0,0.0,-1.359807,-0.072781,2.536347,1.378155,-0.338321,0.462388,0.239599,0.098698,0.363787,...,-0.018307,0.277838,-0.110474,0.066928,0.128539,-0.189115,0.133558,-0.021053,149.62,-1
1,0.0,1.191857,0.266151,0.16648,0.448154,0.060018,-0.082361,-0.078803,0.085102,-0.255425,...,-0.225775,-0.638672,0.101288,-0.339846,0.16717,0.125895,-0.008983,0.014724,2.69,-1
2,1.0,-1.358354,-1.340163,1.773209,0.37978,-0.503198,1.800499,0.791461,0.247676,-1.514654,...,0.247998,0.771679,0.909412,-0.689281,-0.327642,-0.139097,-0.055353,-0.059752,378.66,-1
3,1.0,-0.966272,-0.185226,1.792993,-0.863291,-0.010309,1.247203,0.237609,0.377436,-1.387024,...,-0.1083,0.005274,-0.190321,-1.175575,0.647376,-0.221929,0.062723,0.061458,123.5,-1
4,2.0,-1.158233,0.877737,1.548718,0.403034,-0.407193,0.095921,0.592941,-0.270533,0.817739,...,-0.009431,0.798278,-0.137458,0.141267,-0.20601,0.502292,0.219422,0.215153,69.99,-1


## Split the Data for Training and Testing

In [None]:
# Split data into training/testing
training_data_mask: np.ndarray = np.random.rand(len(data)) < 0.8

train: pd.DataFrame = data[training_data_mask]
test: pd.DataFrame = data[~training_data_mask]

## Implement Soft-SVM

In [None]:
def hinge_loss(
    X: np.ndarray, Y: np.ndarray, weights: np.ndarray, bias: float, C: float
) -> float:
    """Computes the loss for the current weights and bias

    Args:
        X (np.ndarray): The features.
        Y (np.ndarray): The labels (either -1 or 1).
        weights (np.ndarray): The current weights of the model.
        bias (float): The current bias of the model.
        C (float): Soft-SVM hyperparameter for adjusting margin size.

    Returns:
        float: The regularized hinge-loss value for the current model.
    """

    margins = 1 - Y * (np.dot(X, weights) + bias)

    hinge = np.maximum(0, margins)

    return 0.5 * np.dot(weights, weights) + C * np.sum(hinge)



def fit(
    X: np.ndarray,
    Y: np.ndarray,
    learning_rate: float,
    C: float,
    epochs,
) -> tuple[np.ndarray, float]:
    """Fits weights and bias for the input data and labels.

    A linear SVM model.

    Weights and bias are initialized to zero.

    The program does the following for each epoch:
    - Computes the margin given the current weights and biases for all samples.
    - Updates the weights using misclassified points which are determined using the margin.
    - Updates the bias using the misclassified points which are determined using the margin.
    - Display the current hinge loss every 50 epochs.

    Args:
        X (np.ndarray): The features.
        Y (np.ndarray): The labels.
        learning_rate (float): The learning rate.
        C (float): Soft-SVM hyperparameter for adjusting margin size.
        epochs (int): The number of epochs the model will run.

    Returns:
        tuple[np.ndarray, float]: The final weights and bias.
    """

    num_samples, num_features = X.shape


    weights = np.zeros(num_features)

    bias = 0.0


    for epoch in range(epochs):


        margins = 1 - Y * (np.dot(X, weights) + bias)


        grad_weights = weights - np.dot(C * (margins > 0) * Y, X) / num_samples

        grad_bias = np.sum(-C * (margins > 0) * Y) / num_samples


        weights -= learning_rate * grad_weights

        bias -= learning_rate * grad_bias


        if epoch % 50 == 0 or epoch == epochs - 1:

            loss = hinge_loss(X, Y, weights, bias, C)

            print(f"Epoch {epoch}, Loss = {loss}")


    return weights, bias


def predict(X: np.ndarray, weights: np.ndarray, bias: float) -> np.ndarray:
    """Use input weights/bias to predict the label for the input data.

    Args:
        X (np.ndarray): Data used for predictions.
        weights (np.ndarray): Weights used to make predictions.
        bias (float): Bias used to make predictions.

    Returns:
        np.ndarray: The predicted labels for the input data.
    """

    return np.sign(np.dot(X, weights) + bias)

Epoch 0, Loss = 290766.36864532507
Epoch 50, Loss = 256302.45922091577
Epoch 100, Loss = 223520.20798385664
Epoch 150, Loss = 192337.5584211423
Epoch 200, Loss = 162676.45620760083
Epoch 250, Loss = 134462.66156568547
Epoch 300, Loss = 107625.55146206406
Epoch 350, Loss = 82097.94836259854
Epoch 400, Loss = 57815.955021977774
Epoch 450, Loss = 34718.79640801689
Epoch 499, Loss = 13177.509930230037


## Train the Model

In [None]:
X_train = train.drop("Class", axis=1).to_numpy()
Y_train = train["Class"].to_numpy()

w, b = fit(X_train, Y_train, learning_rate=0.001, C=0.01, epochs=500)

## Test the Model

In [None]:
X_test = test.drop("Class", axis=1).to_numpy()
Y_test = test["Class"].to_numpy()

np.mean(Y_test == predict(X_test, w, b))

np.float64(0.9983338829205644)