# Support Vector Machine

Support vector machine problem taken from: Cortes, Corinna, and Vladimir Vapnik. "Support-vector networks." Machine learning 20.3 (1995): 273-297.

## Problem Description
Given a data matrix $x$ of dimension $n\times d$, and a label vector $y\in\{-1,+1\}^n$, $w^Tx+b$ defines a hyperplane to separate two classes, and $\zeta$ is the pre-defined slack variable.

We have the following constrained optimization problem,

$$\min_{w,b,\zeta} \frac{1}{2} ||w||^2 $$
$$ \text{s.t. }y_i(w^Tx_i+b)\geq 1-\zeta $$

## Modules Importing
Import all necessary modules and add PyGRANSO src folder to system path.

In [1]:
import time
import torch
from pygranso.pygranso import pygranso
from pygranso.pygransoStruct import pygransoStruct
from torch.linalg import norm
from sklearn import datasets
from sklearn.preprocessing import normalize
from torchvision import datasets as torch_datasets
from torchvision import transforms
import scipy

## Data Initialization 
Specify torch device, and generate data

Use GPU for this problem. If no cuda device available, please set *device = torch.device('cpu')*

In [2]:
device = torch.device('cuda')
torch.manual_seed(42)

def get_data(data_name,partial_data,dp_num):
    # possible data_name: ['iris','bc','lfw_pairs','mnist','rcv1']

    if data_name == 'iris':
        iris = datasets.load_iris()
        X = iris.data
        y = iris.target

        X = X[y != 2]
        y = y[y != 2]
        y[y==0] = -1

        X /= X.max()  # Normalize X to speed-up convergence
    elif data_name == 'bc':
        bc = datasets.load_breast_cancer()
        X = bc.data
        y = bc.target
        if partial_data:
            X = X[0:dp_num]
            y = y[0:dp_num]
        y[y==0] = -1
        X = normalize(X,axis=0)  # Normalize X to speed-up convergence

    elif data_name == 'lfw_pairs':
        # train_set
        lfw_pairs = datasets.fetch_lfw_pairs(subset='train')
        X = lfw_pairs.data
        y = lfw_pairs.target
        names = lfw_pairs.target_names
        print("dataset names: {}".format(names))
        if partial_data:
            X = X[0:dp_num]
            y = y[0:dp_num]
        y[y==0] = -1
        
        # test_set
        lfw_pairs_test = datasets.fetch_lfw_pairs(subset='test')
        X_test = lfw_pairs_test.data
        y_test = lfw_pairs_test.target
        if partial_data:
            X_test = X_test[0:dp_num]
            y_test = y_test[0:dp_num]
        y_test[y_test==0] = -1


    elif data_name == 'mnist':
        train_data = torch_datasets.MNIST(
            root = '/home/buyun/Documents/GitHub/PyGRANSO/examples/data/mnist',
            train = True,
            transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
            ]),
            download = True,
        )

        test_data = torch_datasets.MNIST(
            root = '/home/buyun/Documents/GitHub/PyGRANSO/examples/data/mnist',
            train = False,
            transform=transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.1307,), (0.3081,))
            ]),
            download = True,
        )

        loaders = {
            'train' : torch.utils.data.DataLoader(train_data,
                                                batch_size=60000,
                                                shuffle=True,
                                                num_workers=1),
            'test' : torch.utils.data.DataLoader(test_data,
                                                batch_size=10000,
                                                shuffle=True,
                                                num_workers=1)
        }

        X_train, y_train = next(iter(loaders['train']))
        X_test, y_test = next(iter(loaders['test']))
        X_train = torch.reshape(X_train,(-1,28*28))
        y_train[y_train%2==1] = 1
        y_train[y_train%2==0] = -1
        X_test = torch.reshape(X_test,(-1,28*28))
        y_test[y_test%2==1] = 1
        y_test[y_test%2==0] = -1


        if partial_data:
            X_train = X_train[0:dp_num]
            y_train = y_train[0:dp_num]
            X_test = X_test[0:dp_num]
            y_test = y_test[0:dp_num]

        X = X_train.to(device=device, dtype=torch.double)
        y = y_train.to(device=device, dtype=torch.double)
        X_test = X_test.to(device=device, dtype=torch.double)
        y_test = y_test.to(device=device, dtype=torch.double)

    elif data_name == 'rcv1':
        print('start reading data')
        X, y = datasets.load_svmlight_file('/home/buyun/datasets/rcv1_train.binary.bz2')


        if partial_data == False:
            X_test, y_test = datasets.load_svmlight_file('/home/buyun/datasets/rcv1_test.binary.bz2') # very large
        else:
            X_test = X[0:dp_num]
            y_test = y[0:dp_num]
            X = X[dp_num:]
            y = y[dp_num:]


        X = scipy.sparse.csr_matrix.toarray(X)
        X_test = scipy.sparse.csr_matrix.toarray(X_test)
        print('end reading data')

    else:
        print('please specify a legal data name')

    if data_name != 'mnist':
        X = torch.from_numpy(X).to(device=device, dtype=torch.double)
        y = torch.from_numpy(y).to(device=device, dtype=torch.double)
        [n,d] = X.shape
        X_test = torch.from_numpy(X_test).to(device=device, dtype=torch.double)
        y_test = torch.from_numpy(y_test).to(device=device, dtype=torch.double)
        [n_test,_] = X_test.shape
        
    else:
        n = X.shape[0]
        d = X.shape[1]
        n_test = X_test.shape[0]


    y = y.unsqueeze(1)
    y_test = y_test.unsqueeze(1)

    # d: size of data
    return [d,X,y,X_test,y_test,n,n_test]

In [3]:
# data_name = 'iris'
# data_name = 'bc' # breast cancer 
# data_name = 'lfw_pairs' # large dataset
# data_name = 'mnist'
data_name = 'rcv1' # document classification
zeta = 0.5 # slack variable
[d,X,y,X_test,y_test,n,n_test] = get_data(data_name=data_name,partial_data=True,dp_num=10000)

start reading data
end reading data


## Function Set-Up

Encode the optimization variables, and objective and constraint functions.

Note: please strictly follow the format of comb_fn, which will be used in the PyGRANSO main algortihm.

In [4]:
# variables and corresponding dimensions.
var_in = {"w": [d,1], "b": [1,1]}

def user_fn(X_struct,X,y,zeta):
    w = X_struct.w
    b = X_struct.b    

    # objective function
    f = 0.5*w.T@w 
    # inequality constraint 
    ci = pygransoStruct()
    constr = 1 - zeta - y*(X@w+b)
    constr = constr.to(device=device, dtype=torch.double)
    ci.c1 = torch.linalg.vector_norm(torch.clamp(constr, min=0),2) # l2

    # equality constraint
    ce = None

    return [f,ci,ce]

comb_fn = lambda X_struct : user_fn(X_struct,X,y,zeta=zeta)

## User Options
Specify user-defined options for PyGRANSO

In [5]:
opts = pygransoStruct()
opts.torch_device = device
opts.mu0 = 1
opts.print_frequency = 10
opts.maxit = 300
opts.print_use_orange = False
opts.print_ascii = True
opts.quadprog_info_msg  = False
opts.opt_tol = 1e-6
opts.maxclocktime = 150
opts.QPsolver = 'osqp'
opts.limited_mem_size = 20
opts.x0 =  torch.randn((d+1,1)).to(device=device, dtype=torch.double)
opts.x0 = opts.x0/norm(opts.x0)

## Main Algorithm

In [6]:
start = time.time()
soln = pygranso(var_spec = var_in,combined_fn = comb_fn,user_opts = opts)
end = time.time()



PyGRANSO: A PyTorch-enabled port of GRANSO with auto-differentiation                                             | 
Version 1.2.0                                                                                                    | 
Licensed under the AGPLv3, Copyright (C) 2021-2022 Tim Mitchell and Buyun Liang                                  | 
Problem specifications:                                                                                          | 
 # of variables                     :   47237                                                                    | 
 # of inequality constraints        :       1                                                                    | 
 # of equality constraints          :       0                                                                    | 
Limited-memory mode enabled with size = 20.                                                                      | 
NOTE: limited-memory mode is generally NOT                            

## Result Prediction

In [7]:
w = soln.final.x[0:d]
b = soln.final.x[d:d+1]
res = X@w+b
predicted = torch.zeros(n,1).to(device=device, dtype=torch.double)
predicted[res>=0] = 1
predicted[res<0] = -1
correct = (predicted == y).sum().item()
acc = correct/n
print("train acc = {:.2f}%".format((100 * acc)))

# obtain test acc
res_test = X_test@w+b
predict_test = torch.zeros(n_test,1).to(device=device, dtype=torch.double)
predict_test[res_test>=0] = 1
predict_test[res_test<0] = -1
correct_test = (predict_test == y_test).sum().item()
test_acc = correct_test/n_test
print("test acc = {:.2f}%".format((100 * test_acc)))

train acc = 99.91%
test acc = 95.60%
