In [1]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score
import time
import matplotlib.pyplot as plt
from sklearn.random_projection import GaussianRandomProjection
from sklearn.preprocessing import StandardScaler

class SVM:
    def __init__(self, C=1.0, eta=0.01, max_iter=1000, tol=1e-5):
        self.C = C
        self.eta = eta
        self.max_iter = max_iter
        self.tol = tol
        self.w = None
        self.b = None

    def hinge_loss(self, X, y):
        """Compute the hinge loss for the given data and current model parameters."""
        z = y * (np.dot(X, self.w) + self.b)
        return np.maximum(0, 1 - z).mean()

    def fit(self, X, y, method='sgd'):
        """Train the SVM model using the specified optimization method."""
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        if method == 'sgd':
            self.fit_sgd(X, y)
        elif method == 'adagrad':
            self.fit_adagrad(X, y)
        else:
            raise ValueError("Invalid optimization method. Use 'sgd' or 'adagrad'.")

    def fit_sgd(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for epoch in range(self.max_iter):
            for i in range(n_samples):
                sample = X[i]
                label = y[i]
                margin = label * (np.dot(self.w, sample) + self.b)
                if margin < 1:
                    grad_w = self.C * label * sample - self.w
                    grad_b = self.C * label
                    # Gradient clipping
                    grad_norm = np.linalg.norm(grad_w)
                    if grad_norm > 1:
                        grad_w = grad_w / grad_norm
                    self.w = self.w - self.eta * grad_w
                    self.b = self.b - self.eta * grad_b
            if self.hinge_loss(X, y) < self.tol:
                break

    def fit_adagrad(self, X, y):
        """Train the SVM model using Adagrad optimization."""
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0
        G_w = np.zeros(n_features)
        G_b = 0

        for epoch in range(self.max_iter):
            for i in range(n_samples):
                sample = X[i]
                label = y[i]
                margin = label * (np.dot(self.w, sample) + self.b)
                if margin < 1:
                    grad_w = self.C * label * sample - self.w
                    grad_b = self.C * label
                    G_w += grad_w ** 2
                    G_b += grad_b ** 2
                    self.w = self.w - self.eta / np.sqrt(G_w) * grad_w
                    self.b = self.b - self.eta / np.sqrt(G_b) * grad_b
            if self.hinge_loss(X, y) < self.tol:
                break

    def predict(self, X):
        """Predict the labels for the given data."""
        y_pred = np.where(np.isnan(np.dot(X, self.w) + self.b), 0, np.sign(np.dot(X, self.w) + self.b))
        return y_pred

# Load data
toydata = pd.read_csv('toydata_tiny.csv')
X_toy = toydata.iloc[:, :-1].values
y_toy = toydata.iloc[:, -1].values

toydata_large = pd.read_csv('toydata_large.csv')
X_toy_large = X_toy
y_toy_large = y_toy
imdb_data = np.load('imdb.npz', allow_pickle=True)



# Train linear SVMs using the original features
print("Training linear SVMs using the original features:")

# Toydata
svm_sgd_toy = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
start_time = time.time()
svm_sgd_toy.fit(X_toy, y_toy, method='sgd')
sgd_runtime_toy = time.time() - start_time
sgd_accuracy_toy = accuracy_score(y_toy, svm_sgd_toy.predict(X_toy))
print(f"Toydata SGD Training Time: {sgd_runtime_toy:.2f} seconds")
print(f"Toydata SGD Accuracy: {sgd_accuracy_toy:.2f}")

svm_adagrad_toy = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
start_time = time.time()
svm_adagrad_toy.fit(X_toy, y_toy, method='adagrad')
adagrad_runtime_toy = time.time() - start_time
adagrad_accuracy_toy = accuracy_score(y_toy, svm_adagrad_toy.predict(X_toy))
print(f"Toydata Adagrad Training Time: {adagrad_runtime_toy:.2f} seconds")
print(f"Toydata Adagrad Accuracy: {adagrad_accuracy_toy:.2f}")

# Toydata_large
svm_sgd_toy_large = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
start_time = time.time()
svm_sgd_toy_large.fit(X_toy_large, y_toy_large, method='sgd')
sgd_runtime_toy_large = time.time() - start_time
sgd_accuracy_toy_large = accuracy_score(y_toy_large, svm_sgd_toy_large.predict(X_toy_large))
print(f"Toydata_large SGD Training Time: {sgd_runtime_toy_large:.2f} seconds")
print(f"Toydata_large SGD Accuracy: {sgd_accuracy_toy_large:.2f}")

svm_adagrad_toy_large = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
start_time = time.time()
svm_adagrad_toy_large.fit(X_toy_large, y_toy_large, method='adagrad')
adagrad_runtime_toy_large = time.time() - start_time
adagrad_accuracy_toy_large = accuracy_score(y_toy_large, svm_adagrad_toy_large.predict(X_toy_large))
print(f"Toydata_large Adagrad Training Time: {adagrad_runtime_toy_large:.2f} seconds")
print(f"Toydata_large Adagrad Accuracy: {adagrad_accuracy_toy_large:.2f}")




# Train SVMs using Gaussian RFFs
print("\nTraining SVMs using Gaussian RFFs:")

for n_rff in [100, 300, 500]:
    print(f"Using {n_rff} RFF features:")

    # Toydata
    rff = GaussianRandomProjection(n_components=n_rff, random_state=42)
    X_toy_rff = rff.fit_transform(X_toy)
    svm_rff_toy = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
    start_time = time.time()
    svm_rff_toy.fit(X_toy_rff, y_toy, method='sgd')
    rff_runtime_toy = time.time() - start_time
    rff_accuracy_toy = accuracy_score(y_toy, svm_rff_toy.predict(X_toy_rff))
    print(f"Toydata RFF Training Time: {rff_runtime_toy:.2f} seconds")
    print(f"Toydata RFF Accuracy: {rff_accuracy_toy:.2f}")

    # Toydata_large
    X_toy_large_rff = rff.fit_transform(X_toy_large)
    svm_rff_toy_large = SVM(C=1.0, eta=0.01, max_iter=1000, tol=1e-5)
    start_time = time.time()
    svm_rff_toy_large.fit(X_toy_large_rff, y_toy_large, method='sgd')
    rff_runtime_toy_large = time.time() - start_time
    rff_accuracy_toy_large = accuracy_score(y_toy_large, svm_rff_toy_large.predict(X_toy_large_rff))
    print(f"Toydata_large RFF Training Time: {rff_runtime_toy_large:.2f} seconds")
    print(f"Toydata_large RFF Accuracy: {rff_accuracy_toy_large:.2f}")



Training linear SVMs using the original features:
Toydata SGD Training Time: 2.38 seconds
Toydata SGD Accuracy: 0.50
Toydata Adagrad Training Time: 2.65 seconds
Toydata Adagrad Accuracy: 0.50
Toydata_large SGD Training Time: 2.62 seconds
Toydata_large SGD Accuracy: 0.50
Toydata_large Adagrad Training Time: 5.75 seconds
Toydata_large Adagrad Accuracy: 0.50

Training SVMs using Gaussian RFFs:
Using 100 RFF features:




Toydata RFF Training Time: 6.40 seconds
Toydata RFF Accuracy: 0.50




Toydata_large RFF Training Time: 5.50 seconds
Toydata_large RFF Accuracy: 0.50
Using 300 RFF features:




Toydata RFF Training Time: 7.05 seconds
Toydata RFF Accuracy: 0.50




Toydata_large RFF Training Time: 6.92 seconds
Toydata_large RFF Accuracy: 0.50
Using 500 RFF features:




Toydata RFF Training Time: 6.65 seconds
Toydata RFF Accuracy: 0.50




Toydata_large RFF Training Time: 8.46 seconds
Toydata_large RFF Accuracy: 0.50
