In [33]:
import numpy as np
import pandas as pd

In [34]:
df= pd.read_csv("data_banknote_authentication.txt", header=None)
df.head()

Unnamed: 0,0,1,2,3,4
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [35]:
# assign column names:
df.columns = ['variance', 'skewness', 'curtosis', 'entropy', 'class']
df.head()

Unnamed: 0,variance,skewness,curtosis,entropy,class
0,3.6216,8.6661,-2.8073,-0.44699,0
1,4.5459,8.1674,-2.4586,-1.4621,0
2,3.866,-2.6383,1.9242,0.10645,0
3,3.4566,9.5228,-4.0112,-3.5944,0
4,0.32924,-4.4552,4.5718,-0.9888,0


In [36]:
# determine independent features (x) and the target feature(y):
x=df.iloc[: ,:-1].values
y=df.iloc[:,-1].values

# Convert labels from {0,1} to {-1, +1} for SVM
y = np.where(y == 0, -1, 1)

In [37]:
#normalize features:
x= (x - np.mean(x, axis=0)) / np.std(x, axis=0)

In [38]:
x

array([[ 1.12180565,  1.14945512, -0.97597007,  0.35456135],
       [ 1.44706568,  1.06445293, -0.89503626, -0.12876744],
       [ 1.20780971, -0.77735215,  0.12221838,  0.61807317],
       ...,
       [-1.47235682, -2.62164576,  3.75901744, -0.75488418],
       [-1.40669251, -1.75647104,  2.552043  , -0.04315848],
       [-1.04712236, -0.43982168,  0.29861555,  1.1364645 ]])

In [39]:
# train (60%), validation (20),and test (20%) split with shuffle
np.random.seed(42)
indices = np.arange(x.shape[0])
np.random.shuffle(indices)

x = x[indices]
y = y[indices]

n_total = len(x)
n_train = int(0.6 * n_total)
n_val = int(0.2 * n_total)

X_train = x[:n_train]
y_train = y[:n_train]

X_val = x[n_train:n_train + n_val]
y_val = y[n_train:n_train + n_val]

X_test = x[n_train + n_val:]
y_test = y[n_train + n_val:]

In [41]:
class SVMFromScratch:
    def __init__(self, lambda_param=0.01, learning_rate=0.01, max_iter=1000, optimizer='subgradient', delta=1.0):
        self.lambda_param = lambda_param
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.optimizer = optimizer
        self.delta = delta
        self.w = None
        self.b = 0

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.max_iter):
            for idx, x_i in enumerate(X):
                margin = y[idx] * (np.dot(x_i, self.w) + self.b)
                
                if self.optimizer == 'subgradient':
                    if margin >= 1:
                        dw = self.lambda_param * self.w
                        db = 0
                    else:
                        dw = self.lambda_param * self.w - y[idx] * x_i
                        db = -y[idx]

                elif self.optimizer == 'gradient':  # smooth hinge
                    if margin >= self.delta:
                        dw = self.lambda_param * self.w
                        db = 0
                    elif 0 < margin < self.delta:
                        grad_loss = -y[idx] * x_i * (self.delta - margin) / self.delta
                        dw = self.lambda_param * self.w + grad_loss
                        db = -y[idx] * (self.delta - margin) / self.delta
                    else:
                        dw = self.lambda_param * self.w - y[idx] * x_i
                        db = -y[idx]

                self.w -= self.learning_rate * dw
                self.b -= self.learning_rate * db

    def predict(self, x):
        linear_output = np.dot(x, self.w) + self.b
        return np.sign(linear_output)

    def score(self, x, y):
        preds = self.predict(x)
        return np.mean(preds == y)

In [44]:
# Grid Search Function
def find_best_hyperparameters(X_train, y_train, X_val, y_val, optimizer='subgradient'):
    lambda_candidates = [ 0.001, 0.01, 0.1]
    learning_rates = [0.001, 0.01, 0.1]
    max_iters = [500, 1000]

    best_score = -np.inf
    best_config = {}

    for lam in lambda_candidates:
        for lr in learning_rates:
            for max_iter in max_iters:
                svm = SVMFromScratch(
                    lambda_param=lam,
                    learning_rate=lr,
                    max_iter=max_iter,
                    optimizer=optimizer
                )
                svm.fit(X_train, y_train)
                val_score = svm.score(X_val, y_val)

                print(f"Optimizer: {optimizer}, λ={lam}, lr={lr}, iter={max_iter} ➜ Val Acc: {val_score:.4f}")

                if val_score > best_score:
                    best_score = val_score
                    best_config = {
                        'lambda_param': lam,
                        'learning_rate': lr,
                        'max_iter': max_iter,
                        'optimizer': optimizer
                    }

    return best_config, best_score

In [47]:
#run Grid Search for Subgradient Descent
best_config_subgrad, best_val_score_subgrad = find_best_hyperparameters(
    X_train, y_train, X_val, y_val, optimizer='subgradient'
)

#run Grid Search for Gradient Descent (Smooth Hinge)
best_config_grad, best_val_score_grad = find_best_hyperparameters(
    X_train, y_train, X_val, y_val, optimizer='gradient'
)

#pick the best optimizer and config
if best_val_score_subgrad > best_val_score_grad:
    best_config = best_config_subgrad
    best_optimizer = 'Subgradient Descent'
else:
    best_config = best_config_grad
    best_optimizer = 'Gradient Descent (Smoothed Hinge)'

#training the final model with best hyperparameters
final_svm = SVMFromScratch(**best_config)
final_svm.fit(X_train, y_train)
test_accuracy = final_svm.score(X_test, y_test)

#Print best hyperparameters and optimizer
print("\n Best Optimizer Selected:", best_optimizer)
print(" Best Hyperparameters:")
print(f"  Lambda: {best_config['lambda_param']}")
print(f"  Learning Rate: {best_config['learning_rate']}")
print(f"  Max Iterations: {best_config['max_iter']}")
print(f"  Optimizer: {best_config['optimizer']}")

# If Subgradient was selected, also print best_config_subgrad separately
if best_optimizer == 'Subgradient Descent':
    print("\n Best Config for Subgradient Descent:")
    print(best_config_subgrad)

# Final Test Accuracy
print(f"\n Final Test Accuracy: {test_accuracy * 100:.2f}%")


Optimizer: subgradient, λ=0.001, lr=0.001, iter=500 ➜ Val Acc: 0.9854
Optimizer: subgradient, λ=0.001, lr=0.001, iter=1000 ➜ Val Acc: 0.9854
Optimizer: subgradient, λ=0.001, lr=0.01, iter=500 ➜ Val Acc: 0.9854
Optimizer: subgradient, λ=0.001, lr=0.01, iter=1000 ➜ Val Acc: 0.9854
Optimizer: subgradient, λ=0.001, lr=0.1, iter=500 ➜ Val Acc: 0.9891
Optimizer: subgradient, λ=0.001, lr=0.1, iter=1000 ➜ Val Acc: 0.9891
Optimizer: subgradient, λ=0.01, lr=0.001, iter=500 ➜ Val Acc: 0.9818
Optimizer: subgradient, λ=0.01, lr=0.001, iter=1000 ➜ Val Acc: 0.9818
Optimizer: subgradient, λ=0.01, lr=0.01, iter=500 ➜ Val Acc: 0.9818
Optimizer: subgradient, λ=0.01, lr=0.01, iter=1000 ➜ Val Acc: 0.9818
Optimizer: subgradient, λ=0.01, lr=0.1, iter=500 ➜ Val Acc: 0.9489
Optimizer: subgradient, λ=0.01, lr=0.1, iter=1000 ➜ Val Acc: 0.9562
Optimizer: subgradient, λ=0.1, lr=0.001, iter=500 ➜ Val Acc: 0.9708
Optimizer: subgradient, λ=0.1, lr=0.001, iter=1000 ➜ Val Acc: 0.9708
Optimizer: subgradient, λ=0.1, lr=0