In [1]:
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import numpy as np
import scipy.io


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
# Load the data (assuming it's already loaded)
data = scipy.io.loadmat("/content/drive/MyDrive/ml_data/dhrb.mat")
data2 = scipy.io.loadmat("/content/drive/MyDrive/ml_data/dhrbclass.mat")

X = data['X']
y = data2['y'].flatten()  # Flatten the array to make it 1D

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [6]:
import numpy as np
from scipy.sparse import issparse
from time import time
import sys
from sklearn.model_selection import train_test_split
import scipy.io
from scipy.optimize import minimize

from scipy.sparse import diags, issparse
def NSSVM(X, y, pars=None):
    import numpy as np
    import scipy.sparse as sp

    def Fnorm(var):
        return np.linalg.norm(var)**2

    def GetParameters(m, n):
        maxit = 1e3
        alpha = np.zeros(m)
        tune = 0
        disp = 1
        tol = 1e-6
        eta = min(1/m, 1e-4)
        if max(m, n) < 1e4:
            beta = 1
        elif m <= 5e5:
            beta = 0.05
        elif m <= 1e8:
            beta = 10
        s0 = np.ceil(beta * n * (np.log2(m/n))**2)
        if m > 5e6:
            C = np.log10(m)
        else:
            C = 1/2
        c = C/100
        return maxit, alpha, tune, disp, tol, eta, s0, C, c

    def my_cg(Q, y, E, b, cgtol, cgit, x):
        r = b
        e = np.sum(r*r)
        t = e
        for i in range(cgit):
            if e < cgtol*t:
                break
            if i == 0:
                p = r
            else:
                p = r + (e/e0)*p
            p1 = p[:-1]
            w = np.concatenate((((Q.dot(p1)).dot(Q.T)).T + E*p1 + p[-1]*y, np.sum(y*p1)))
            a = e/np.sum(p*w)
            x = x + a*p
            r = r - a*w
            e0 = e
            e = np.sum(r*r)
        return x

    if pars is None:
        pars = {}

    t0 = 0
    m, n = X.shape

    if sp.issparse(X) and sp.issparse(X) and sp.issparse(X)/m/n > 0.1:
        X = X.toarray()

    # if n < 3e4:
    #     Qt = y*X
    # else:
    #     Qt = sp.diags(y, 0, m, m)*X

    if y.ndim != 1 or y.shape[0] != m:
        raise ValueError("y must be a 1D array with the same number of elements as rows in X")

    if n < 3e4:
        Qt = y[:, np.newaxis] * X
    else:
        if y.shape[0] != X.shape[0]:  # Ensuring y and X have matching dimensions
            raise ValueError("Mismatched dimensions between y and X: {} vs {}".format(y.shape[0], X.shape[0]))
        Qt = sp.diags(y, 0, shape=(m, m)) * X


    Q = Qt.T
    maxit, alpha, tune, disp, tol, eta, s0, C, c = GetParameters(m, n)

    if 'maxit' in pars:
        maxit = pars['maxit']
    if 'alpha' in pars:
        alpha = pars['alpha']
    if 'disp' in pars:
        disp = pars['disp']
    if 'tune' in pars:
        tune = pars['tune']
    if 'tol' in pars:
        tol = pars['tol']
    if 'eta' in pars:
        eta = pars['eta']
    if 's0' in pars:
        s0 = min(m, int(pars['s0']))
    if 'C' in pars:
        C = pars['C']
    if 'c' in pars:
        c = pars['c']

    T1 = np.where(y == 1)[0]
    nT1 = len(T1)
    T2 = np.where(y == -1)[0]
    nT2 = len(T2)

    if nT1 < s0:
        T = np.concatenate((T1, T2[:int(s0 - nT1)]))  # Cast to int
    elif nT2 < s0:
        T = np.concatenate((T1[:int(s0 - nT2)], T2))  # Cast to int
    else:
        T = np.concatenate((T1[:int(np.ceil(s0 / 2))], T2[:int(s0 - np.ceil(s0 / 2))]))  # Ensure all indices are integers
    T = np.sort(T[:int(s0)])
    s = int(s0)
    b = (nT1 >= nT2) - (nT1 < nT2)
    bb = b
    w = np.zeros(n)
    gz = -np.ones(m)
    ERR = np.zeros(maxit)
    ACC = np.zeros(maxit)
    ACC[0] = 1 - np.count_nonzero(np.sign(b)-y)/m
    ET = np.ones(s)/C

    maxACC = 0
    flag = 1
    j = 1
    r = 1.1
    count = 1
    count0 = 2
    iter0 = -1

    if disp:
        print('Run NSSVM ......')
        print('------------------------------------------')
        print('  Iter          Error           Accuracy  ')
        print('------------------------------------------')

    for iter in range(1, maxit+1):
        if iter == 1 or flag:
            QT = Q[:, T]
            QtT = Qt[T, :]
            yT = y[T]
            ytT = yT.T

        alphaT = alpha[T]
        gzT = -gz[T]
        alyT = -ytT.dot(alphaT)

        err = (np.abs(Fnorm(alpha)-Fnorm(alphaT)) + Fnorm(gzT) + alyT**2)/(m*n)
        ERR[iter-1] = np.sqrt(err)

        if tune and iter < 30 and m <= 1e8:
            stop1 = (iter > 5 and err < tol*s*np.log2(m)/100)
            stop2 = (s != s0 and np.abs(ACC[iter-1] - np.max(ACC[:iter-1])) <= 1e-4)
            stop3 = (s != s0 and iter > 10 and np.max(ACC[iter-5:iter]) < maxACC)
            stop4 = (count != count0+1 and ACC[iter] >= ACC[0])
            stop = (stop1 and (stop2 or stop3) and stop4)
        else:
            stop1 = (err < tol*np.sqrt(s)*np.log10(m))
            stop2 = (iter > 4 and np.std(ACC[iter-2:iter]) < 1e-4)
            stop3 = (iter > 20 and np.abs(np.max(ACC[iter-8:iter]) - maxACC) <= 1e-4)
            stop = (stop1 and stop2) or stop3

        if disp:
            print(f'  {iter:3d}          {err:6.2e}         {ACC[iter-1]:7.5f}')

        if ACC[iter-1] > 0 and (ACC[iter-1] >= 0.99999 or stop):
            break

        ET0 = ET
        ET = (alphaT >= 0)/C + (alphaT < 0)/c

        if min(n, s) > 1e3:
            d = my_cg(QT, yT, ET, np.concatenate((gzT, alyT)), 1e-10, 50, np.zeros(s+1))
            dT = d[:s]
            dend = d[-1]
        else:
            if s <= n:
                if iter == 1 or flag:
                    PTT0 = QtT.dot(QT)
                PTT = PTT0 + sp.diags(ET, 0, (s, s))
                d = np.linalg.solve(np.concatenate((np.concatenate((PTT, yT[:, np.newaxis]), axis=1), np.concatenate((ytT[:, np.newaxis], np.zeros((1, 1))), axis=1))), np.concatenate((gzT, alyT)))
                dT = d[:s]
                dend = d[-1]
            else:
                ETinv = 1/ET
                flag1 = np.count_nonzero(ET0) != np.count_nonzero(ET)
                flag2 = np.count_nonzero(ET0) == np.count_nonzero(ET) and np.count_nonzero(ET0-ET) == 0
                if iter == 1 or flag or flag1 or not flag2:
                    EQtT = sp.diags(ETinv, 0, (s, s)).dot(QtT)
                    P0 = np.eye(n) + QT.dot(EQtT)
                Ey = ETinv*yT
                Hy = Ey - EQtT.dot(np.linalg.solve(P0, QT.dot(Ey)))
                dend = (gzT.dot(Hy) - alyT)/(ytT.dot(Hy))
                tem = ETinv*(gzT - dend*yT)
                dT = tem - EQtT.dot(np.linalg.solve(P0, QT.dot(tem)))

        alpha = np.zeros(m)
        alphaT = alphaT + dT
        alpha[T] = alphaT
        b = b + dend

        w = QT.dot(alphaT)
        Qtw = Qt.dot(w)
        tmp = y*Qtw

        gz = Qtw - 1 + b*y
        ET1 = (alphaT >= 0)/C + (alphaT < 0)/c
        gz[T] = alphaT*ET1 + gz[T]

        j = iter+1
        ACC[j-1] = 1 - np.count_nonzero(np.sign(tmp+b)-y)/m

        if m <= 1e7:
            bb = np.mean(yT - tmp[T])
            ACCb = 1 - np.count_nonzero(np.sign(tmp+bb)-y)/m
            if ACC[j-1] >= ACCb:
                bb = b
            else:
                ACC[j-1] = ACCb
        else:
            bb = b

        if m < 6e6 and ACC[j-1] < 0.5:
            opt = {'maxiter': 10*(m >= 1e6) + 20*(m < 1e6), 'disp': False}
            b0 = minimize(lambda t: np.sum((np.sign(tmp+t[0])-y)**2), bb, options=opt).x[0]
            acc0 = 1 - np.count_nonzero(np.sign(tmp+b0)-y)/m
            if ACC[j-1] < acc0:
                bb = b0
                ACC[j-1] = acc0

        if ACC[j-1] >= maxACC:
            maxACC = ACC[j-1]
            alpha0 = alpha.copy()
            tmp0 = tmp.copy()
            maxwb = np.concatenate((w, [bb]))

        T0 = T.copy()
        mark = 0
        if tune and (err < tol or iter % 10 == 0) and iter > iter0+2 and count < 10:
            count0 = count
            count = count + 1
            s = min(m, np.ceil(r*s))
            iter0 = iter
            if count > (m >= 1e6 or n < 3) + 1*(m < 1e6 and n >= 5):
                alpha = np.zeros(m)
                gz = -np.ones(m)
                mark = 1
        else:
            count0 = count

        if s != m:
            if m < 5e8:
              T = np.argsort(np.abs(alpha - eta * gz))[-s:]
            else:
              T = np.argsort(np.abs(alpha - eta * gz))[::-1][:s]
            T = np.sort(T[:s])
            if mark:
                nT = np.count_nonzero(y[T] == 1)
                if nT == s:
                    if nT2 <= 0.75*s:
                        T = np.concatenate((T[:s-np.ceil(nT2/2)], T2[:np.ceil(nT2/2)]))
                    else:
                        T = np.concatenate((T[:np.ceil(s/4)], T2[:s-np.ceil(s/4)]))
                elif nT == 0:
                    if nT1 <= 0.75*s:
                        T = np.concatenate((T[:s-np.ceil(nT1/2)], T1[:np.ceil(nT1/2)]))
                    else:
                        T = np.concatenate((T[:np.ceil(s/4)], T1[:s-np.ceil(s/4)]))
                T = np.sort(T[:s])
        else:
            T = np.arange(m)

        flag = 1
        flag3 = np.count_nonzero(T0) == s

        if flag3:
            flag3 = np.count_nonzero(T-T0) == 0

        if flag3 or np.count_nonzero(T0) == m:
            flag = 0
            T = T0

    wb = np.concatenate((w, [bb]))
    acc = ACC[j-1]

    if m <= 1e7 and iter > 1:
        opt = {'maxiter': 20, 'disp': False}
        b0 = minimize(lambda t: np.linalg.norm(np.sign(tmp0+t[0])-y), maxwb[-1], options=opt).x[0]
        acc0 = 1 - np.count_nonzero(np.sign(tmp0+b0)-y)/m
        if acc < acc0:
            wb = np.concatenate((maxwb[:-1], [b0]))
            acc = acc0

    if acc < maxACC-1e-4:
        alpha = alpha0.copy()
        wb = maxwb.copy()
        acc = maxACC

    if disp:
        print('------------------------------------------')

    Out = {'s': s, 'w': wb, 'sv': s, 'ACC': acc, 'iter': iter, 'time': 0-t0, 'alpha': alpha}
    return Out




In [7]:
import numpy as np
import scipy.io
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load your data
data_path = "/content/drive/MyDrive/ml_data/dhrb.mat"
data2_path = "/content/drive/MyDrive/ml_data/dhrbclass.mat"
data = scipy.io.loadmat(data_path)
data2 = scipy.io.loadmat(data2_path)

X = data['X']
y = data2['y'].flatten()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def run_NSSVM(X_train, y_train, X_test, y_test, pars):
    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    model_output = NSSVM(X_train_scaled, y_train, pars)
    print("Training complete.")
    print("Model weights:", model_output['w'])
    print("Final Accuracy:", model_output['ACC'])
    print("Total iterations:", model_output['iter'])
    print("Training time:", model_output['time'], "seconds")

    # If you need to evaluate on the test set, include this and ensure predict_NSSVM is implemented
    # predictions = predict_NSSVM(X_test_scaled, model_output['w'])
    # test_accuracy = accuracy_score(y_test, predictions)
    # print("Test accuracy:", test_accuracy)

    return model_output

# Define parameters
pars = {"maxit": 2000, "lr": 0.01, "C": 1.0, "disp": True}

# Run with the updated parameters and data scaling
model_output = run_NSSVM(X_train, y_train, X_test, y_test, pars)


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.38e-04         0.82848
    2          4.29e-02         0.82430
    3          1.31e-03         0.57471
    4          7.42e-04         0.54331
    5          1.91e-09         0.62695
    6          5.39e-32         0.62678
------------------------------------------
Training complete.
Model weights: [-0.2094981  -0.05200466  0.17280422  0.04028559  0.10612397  0.00363238
  0.14507248 -0.1534323   0.09777412  0.03025629 -0.00809935  0.14009657
  0.13829572 -0.0401651  -0.04209933  0.09400907  0.01587642 -0.99718262]
Final Accuracy: 0.8243047539091195
Total iterations: 6
Training time: 0 seconds


## iris dataset

In [8]:
import numpy as np
import scipy.io
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data[iris.target != 2]  # Select only the first two classes for binary classification
y = iris.target[iris.target != 2]
y[y == 0] = -1  # Convert class labels to -1 and 1
y[y == 1] = 1


# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def run_NSSVM(X_train, y_train, X_test, y_test, pars):
    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    model_output = NSSVM(X_train_scaled, y_train, pars)
    print("Training complete.")
    print("Model weights:", model_output['w'])
    print("Final Accuracy:", model_output['ACC'])
    print("Total iterations:", model_output['iter'])
    print("Training time:", model_output['time'], "seconds")

    # If you need to evaluate on the test set, here's a placeholder for prediction logic
    # predictions = predict_NSSVM(X_test_scaled, model_output['w'])
    # test_accuracy = accuracy_score(y_test, predictions)
    # print("Test accuracy:", test_accuracy)

    return model_output

# Define parameters
pars = {"maxit": 2000, "tol": 1e-6, "C": 1.0, "disp": True}

# Run with the updated parameters and data scaling
model_output = run_NSSVM(X_train, y_train, X_test, y_test, pars)


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          2.34e-01         0.52500
    2          1.01e+02         1.00000
------------------------------------------
Training complete.
Model weights: [-0.03072836 -0.17170956  0.49830904  0.39881557  0.05218903]
Final Accuracy: 1.0
Total iterations: 2
Training time: 0 seconds


## a5a.t dataset

In [14]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a5a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def predict_NSSVM(X, model_output):
    """Predict using the model weights from NSSVM."""
    w = model_output['w'][:-1]  # Exclude the bias term if it's included in the weights
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)

# Timing the NSSVM training
start_time = time.time()

# Run NSSVM on the training data
model_output = NSSVM(X_train_scaled, y_train, {'maxit': 1000, 'tol': 1e-6, 'C': 1.0, 'disp': True})

elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions
y_pred_train = predict_NSSVM(X_train_scaled, model_output)
y_pred_test = predict_NSSVM(X_test_scaled, model_output)

# Calculate and print accuracies
train_accuracy = np.mean(y_pred_train == y_train) * 100
test_accuracy = np.mean(y_pred_test == y_test) * 100

print("Training complete.")
print("Model weights:", model_output['w'])
print("Final Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Total iterations:", model_output['iter'])
print("Training time:", elapsed_time, "seconds")


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.31e-04         0.75895
    2          9.71e-02         0.79204
    3          2.70e-02         0.75245
    4          4.42e-03         0.75580
    5          1.16e-02         0.76761
    6          1.07e-02         0.75341
    7          2.41e-02         0.75929
    8          9.56e-03         0.74619
    9          2.94e-03         0.71975
   10          4.07e-03         0.75097
   11          8.92e-05         0.74365
   12          1.64e-04         0.75259
   13          3.05e-06         0.75355
   14          2.43e-06         0.75503
   15          2.57e-06         0.75632
   16          7.70e-06         0.75298
   17          6.69e-06         0.75427
   18          1.77e-06         0.75537
   19          3.60e-09         0.76067
   20          1.42e-30         0.76024
   21          6.73e-31         0.76024
--------------

# a6a dataset

In [16]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a6a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def predict_NSSVM(X, model_output):
    """Predict using the model weights from NSSVM."""
    w = model_output['w'][:-1]  # Exclude the bias term if it's included in the weights
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)

# Timing the NSSVM training
start_time = time.time()

# Run NSSVM on the training data
model_output = NSSVM(X_train_scaled, y_train, {'maxit': 1000, 'tol': 1e-6, 'C': 1.0, 'disp': True})

elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions
y_pred_train = predict_NSSVM(X_train_scaled, model_output)
y_pred_test = predict_NSSVM(X_test_scaled, model_output)

# Calculate and print accuracies
train_accuracy = np.mean(y_pred_train == y_train) * 100
test_accuracy = np.mean(y_pred_test == y_test) * 100

print("Training complete.")
print("Model weights:", model_output['w'])
print("Final Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Total iterations:", model_output['iter'])
print("Training time:", elapsed_time, "seconds")


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.49e-04         0.75756
    2          9.29e-02         0.78368
    3          3.48e-02         0.77595
    4          1.15e-02         0.77571
    5          2.41e-02         0.78187
    6          2.91e-03         0.77894
    7          3.34e-02         0.76933
    8          2.57e-02         0.76025
    9          3.13e-03         0.76992
   10          7.63e-04         0.77929
   11          2.09e-04         0.77864
   12          4.53e-03         0.78022
   13          2.06e-05         0.76863
   14          5.20e-05         0.75996
   15          1.32e-02         0.76593
   16          1.05e-04         0.75638
   17          8.69e-05         0.75574
   18          7.73e-10         0.75539
   19          5.18e-30         0.75545
------------------------------------------
Training complete.
Model weights: [-5.57898627e-02 

# a7a dataset

In [17]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a7a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def predict_NSSVM(X, model_output):
    """Predict using the model weights from NSSVM."""
    w = model_output['w'][:-1]  # Exclude the bias term if it's included in the weights
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)

# Timing the NSSVM training
start_time = time.time()

# Run NSSVM on the training data
model_output = NSSVM(X_train_scaled, y_train, {'maxit': 1000, 'tol': 1e-6, 'C': 1.0, 'disp': True})

elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions
y_pred_train = predict_NSSVM(X_train_scaled, model_output)
y_pred_test = predict_NSSVM(X_test_scaled, model_output)

# Calculate and print accuracies
train_accuracy = np.mean(y_pred_train == y_train) * 100
test_accuracy = np.mean(y_pred_test == y_test) * 100

print("Training complete.")
print("Model weights:", model_output['w'])
print("Final Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Total iterations:", model_output['iter'])
print("Training time:", elapsed_time, "seconds")


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.73e-04         0.76420
    2          1.52e-01         0.79465
    3          3.48e-02         0.77172
    4          6.98e-03         0.76830
    5          1.68e-02         0.77734
    6          5.76e-03         0.76974
    7          1.31e-02         0.77529
    8          1.73e-03         0.78083
    9          3.88e-04         0.78698
   10          4.28e-05         0.78600
   11          4.23e-04         0.77278
   12          3.04e-06         0.77491
   13          2.87e-06         0.77635
   14          8.19e-06         0.77248
   15          2.79e-06         0.77339
   16          5.01e-06         0.77628
   17          5.09e-09         0.77423
   18          1.75e-20         0.77392
   19          3.33e-31         0.77392
------------------------------------------
Training complete.
Model weights: [-4.84215209e-02 

# a8a dataset

In [18]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a8a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def predict_NSSVM(X, model_output):
    """Predict using the model weights from NSSVM."""
    w = model_output['w'][:-1]  # Exclude the bias term if it's included in the weights
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)

# Timing the NSSVM training
start_time = time.time()

# Run NSSVM on the training data
model_output = NSSVM(X_train_scaled, y_train, {'maxit': 1000, 'tol': 1e-6, 'C': 1.0, 'disp': True})

elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions
y_pred_train = predict_NSSVM(X_train_scaled, model_output)
y_pred_test = predict_NSSVM(X_test_scaled, model_output)

# Calculate and print accuracies
train_accuracy = np.mean(y_pred_train == y_train) * 100
test_accuracy = np.mean(y_pred_test == y_test) * 100

print("Training complete.")
print("Model weights:", model_output['w'])
print("Final Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Total iterations:", model_output['iter'])
print("Training time:", elapsed_time, "seconds")


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          4.59e-03         0.76026
    2          2.17e+00         0.82691
    3          8.37e-01         0.82970
    4          1.11e-01         0.83147
    5          7.65e-02         0.82970
    6          9.41e-01         0.83084
    7          6.45e-02         0.83996
    8          6.70e-01         0.84136
    9          2.23e-02         0.84187
   10          2.87e-01         0.84326
   11          2.21e-01         0.84453
   12          5.11e-02         0.84503
   13          1.23e-01         0.84415
   14          2.48e-02         0.84503
   15          5.79e-03         0.84579
   16          1.54e-01         0.84503
   17          2.95e-02         0.84554
   18          5.58e-02         0.84554
   19          1.56e-27         0.84592
   20          6.57e-28         0.84592
------------------------------------------
Training co

# a9a dataset

In [20]:
import numpy as np
from sklearn.datasets import load_svmlight_file
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import time

# Load the dataset from the LIBSVM format file
X, y = load_svmlight_file('/content/drive/MyDrive/ml_data/a9a.t')

# Convert to dense format as many scikit-learn estimators expect dense format
X_dense = X.toarray()

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_dense, y, test_size=0.2, random_state=42)

# Standardize the data
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

def predict_NSSVM(X, model_output):
    """Predict using the model weights from NSSVM."""
    w = model_output['w'][:-1]  # Exclude the bias term if it's included in the weights
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)

# Timing the NSSVM training
start_time = time.time()

# Run NSSVM on the training data
model_output = NSSVM(X_train_scaled, y_train, {'maxit': 1000, 'tol': 1e-6, 'C': 1.0, 'disp': True})

elapsed_time = time.time() - start_time  # Calculate elapsed time

# Predictions
y_pred_train = predict_NSSVM(X_train_scaled, model_output)
y_pred_test = predict_NSSVM(X_test_scaled, model_output)

# Calculate and print accuracies
train_accuracy = np.mean(y_pred_train == y_train) * 100
test_accuracy = np.mean(y_pred_test == y_test) * 100

print("Training complete.")
print("Model weights:", model_output['w'])
print("Final Training Accuracy:", train_accuracy)
print("Testing Accuracy:", test_accuracy)
print("Total iterations:", model_output['iter'])
print("Training time:", elapsed_time, "seconds")


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.74e-04         0.76205
    2          1.18e-01         0.79284
    3          2.39e-02         0.76751
    4          4.17e-03         0.78025
    5          1.24e-03         0.77019
    6          2.29e-04         0.75653
    7          6.80e-04         0.73887
    8          3.89e-05         0.74209
    9          7.24e-06         0.73618
   10          1.42e-07         0.75660
   11          1.25e-30         0.75660
------------------------------------------
Training complete.
Model weights: [-1.79553699e-01 -1.72668294e-02  5.93434124e-02  7.82718098e-02
  5.42774838e-02  5.93670568e-02 -3.99475155e-02  6.75545491e-02
  4.16260846e-02 -1.03886858e-03 -3.01977471e-02  6.38378239e-16
  4.13731549e-16  2.15207058e-03  3.05491849e-02 -6.22002905e-03
 -3.60969062e-03 -2.30474348e-02  3.14095589e-02 -1.51488283e-02
 -2.09512919

## sonar, mines vs rocks dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

# Define a function for predictions based on the NSSVM model output
def predict_NSSVM(X, model_output):
    w = model_output['w'][:-1]  # assuming last element in 'w' is the bias
    b = model_output['w'][-1]
    return np.sign(X.dot(w) + b)  # Apply the sign function to get binary predictions

# Load the dataset
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/undocumented/connectionist-bench/sonar/sonar.all-data"
data = pd.read_csv(url, header=None)

# The last column contains labels: 'R' for rock and 'M' for mine
X = data.iloc[:, :-1].values
y = data.iloc[:, -1].values

# Convert class labels 'R' and 'M' to -1 and 1
y = np.where(y == 'R', -1, 1)

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def run_NSSVM(X_train, y_train, X_test, y_test, pars):
    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    model_output = NSSVM(X_train_scaled, y_train, pars)
    print("Training complete.")
    print("Model weights:", model_output['w'])
    print("Final Training Accuracy:", model_output['ACC'])
    print("Total iterations:", model_output['iter'])
    print("Training time:", model_output['time'], "seconds")

    # Evaluate on the test set
    predictions = predict_NSSVM(X_test_scaled, model_output)
    test_accuracy = accuracy_score(y_test, predictions)
    print("Test accuracy:", test_accuracy)

    return model_output

# Define parameters
pars = {"maxit": 2000, "tol": 1e-6, "C": 1.0, "disp": True}

# Run with the updated parameters and data scaling
model_output = run_NSSVM(X_train, y_train, X_test, y_test, pars)


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          1.31e-02         0.51205
    2          1.53e+01         0.87952
    3          4.90e+00         0.90964
    4          1.24e+00         0.92771
    5          6.46e-01         0.90361
    6          1.06e+00         0.89157
    7          5.55e-01         0.92169
    8          3.64e-02         0.92771
    9          1.05e+00         0.90964
   10          5.18e-01         0.93976
   11          2.42e-02         0.94578
   12          5.99e-04         0.93976
   13          7.49e-04         0.93373
   14          1.78e-02         0.93976
   15          9.58e-01         0.92771
   16          1.89e-02         0.96988
   17          5.44e-30         0.96988
------------------------------------------
Training complete.
Model weights: [ 0.28884869  0.08075067 -0.58507656  0.63855408 -0.1712619   0.07367058
 -0.18801916 -0.31676877

# heart disease dataset

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler

# Load the Heart Disease dataset from UCI Repository
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/heart-disease/processed.cleveland.data"
column_names = [
    'age', 'sex', 'cp', 'trestbps', 'chol', 'fbs', 'restecg',
    'thalach', 'exang', 'oldpeak', 'slope', 'ca', 'thal', 'num'
]
data = pd.read_csv(url, names=column_names, na_values='?')

# Handle missing values
data.dropna(inplace=True)

# Convert the 'num' attribute to a binary classification target:
# 0 means 'no presence' of heart disease, and 1-4 mean 'presence' of heart disease
data['num'] = (data['num'] > 0).astype(int)

# Convert class labels 0 to -1 for binary classification
y = np.where(data['num'] == 0, -1, 1)

# Extract features
X = data.iloc[:, :-1].values

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

def run_NSSVM(X_train, y_train, X_test, y_test, pars):
    # Standardize the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Assuming NSSVM function is defined and imports are handled
    model_output = NSSVM(X_train_scaled, y_train, pars)
    print("Training complete.")
    print("Model weights:", model_output['w'])
    print("Final Accuracy:", model_output['ACC'])
    print("Total iterations:", model_output['iter'])
    print("Training time:", model_output['time'], "seconds")

    # Optionally evaluate on the test set
    # predictions = predict_NSSVM(X_test_scaled, model_output['w'])
    # test_accuracy = accuracy_score(y_test, predictions)
    # print("Test accuracy:", test_accuracy)

    return model_output

# Define parameters
pars = {"maxit": 2000, "tol": 1e-6, "C": 1.0, "disp": True}

# Run with the updated parameters and data scaling
model_output = run_NSSVM(X_train, y_train, X_test, y_test, pars)


Run NSSVM ......
------------------------------------------
  Iter          Error           Accuracy  
------------------------------------------
    1          7.43e-02         0.52321
    2          3.35e+01         0.83544
    3          4.94e+00         0.84388
    4          3.89e-02         0.84388
    5          7.23e-04         0.84388
    6          8.88e-02         0.84388
    7          6.59e-09         0.84388
------------------------------------------
Training complete.
Model weights: [-0.03634618  0.217803    0.11810525  0.14576381  0.10908207 -0.15354085
  0.0742419  -0.1689725   0.17420868  0.11531894  0.07345408  0.40082847
  0.18542649 -0.04051312]
Final Accuracy: 0.8438818565400844
Total iterations: 7
Training time: 0 seconds
