In [1]:
import pandas as pd 
import numpy as np 
import random as rand
import matplotlib.pyplot as plt 
import scipy.optimize as op 
import seaborn as sbn
from sklearn.datasets import load_svmlight_file
from sklearn import svm
import matplotlib.pyplot as plt
from sklearn.utils import shuffle
from random import randint

In [2]:
def get_data(df):
    # load_svmlight_file loads dataset into sparse CSR matrix
    data = np.loadtxt(df, delimiter=",")
    X = data[:,:-1];
    Y = data[:,-1];
    return X,Y

def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / y_true.shape[0]
    return accuracy*100

def sgd_pegasos_svm(x, y, weights, lam, iterations):
    if type(weights) == type(None): weights = np.zeros(x[0].shape)
    num_S = len(y)
    for i in range(iterations):
        it = randint(0, num_S-1)
        step = 1/(lam*(i+1))
        decision = y[it] * weights @ x[it].T
        if decision < 1:
            weights = (1 - step*lam) * weights + step*y[it]*x[it]
        else:
            weights = (1 - step*lam) * weights
    return weights

def train_seqSVM(x, y, lam, max_iter):
    n,d = x.shape; 
    uniqlbl = np.unique(y)
    w = np.zeros((len(uniqlbl),d))
    ########
    for j in range(1, len(uniqlbl) + 1):
        theta = np.zeros(d)
        y_i = np.array([1 if label == j else 0 for label in y])
        y_i = np.reshape(y_i, (n, 1))        
        theta = sgd_pegasos_svm(x, y_i,theta,lam, max_iter)
        w[j-1,:] = theta   
    ########
    return w
   
def mini_batch_pegasos_svm(x, y,weights, lam, iterations,batch_size):
    if type(weights) == type(None): weights = np.zeros(x[0].shape)
    num_S = len(y)
    for i in range(iterations):
        step = 1/(lam*(i+1))
        # mini-batch sampling
        batch = np.random.choice(np.arange(0, num_S), batch_size)
        sum_vect = []
        for q in batch:
                sum_vect.append(y[q] * x[q])
        w =(1 - step*lam) * weights + ((step / batch_size) * sum(sum_vect))
    return w

def train_batchSVM(x, y, lam, max_iter,batch_size):
    n,d = x.shape; 
    uniqlbl = np.unique(y)
    w = np.zeros((len(uniqlbl),d))
    indx = np.arange(n)
    ######
    for j in range(1, len(uniqlbl) + 1):
        theta = np.zeros(d)
        y_i = np.array([1 if label == j else 0 for label in y])
        y_i = np.reshape(y_i, (n, 1))        
        theta = mini_batch_pegasos_svm(x, y_i,theta,lam,max_iter,batch_size)
        w[j-1,:] = theta 
    ######
    
    return w

def test_algorithm(x,y,w):
    pred = []
    uniqlbl = np.unique(y)
    ########
    prod= np.dot(x,np.transpose(w))
    ########
    return np.argmax(prod,axis=1)+1

def cross_val(n_samples,n_splits):

    # reference: scikit-learn
    idx = np.arange(n_samples)
    fold_sizes = np.full(n_splits, n_samples // n_splits, dtype=np.int)
    fold_sizes[:n_samples % n_splits] += 1
    current = 0
    indices = []
    for fold_size in fold_sizes:
        start, stop = current, current + fold_size
        indices.append(idx[start:stop])
        current = stop
    return indices

def seq_model_select(X, Y,grid_, n_fold,max_iter):

    n_sample = X.shape[0]
    indices=cross_val(n_sample,n_fold)
    for fold in range(n_fold):
        indxte = indices[fold];
        indxtr = np.setdiff1d(np.arange(0,n_sample), indxte)

        Xtr_v = X[indxtr,:]
        Ytr_v = Y[indxtr]
        Xte_v = X[indxte,:]
        Yte_v = Y[indxte]

        #######
        acc = []
        for g in grid_:
            w = train_seqSVM(Xtr_v,Ytr_v,g,max_iter)
            gg = accuracy(Yte_v, test_algorithm(Xte_v,Yte_v , w))
            acc.append(gg)
        #######

    return grid_[np.argmax(acc)]

def batch_model_select(X, Y,grid_, n_fold,batch_size,max_iter):
    
    n_sample = X.shape[0]
    indices=cross_val(n_sample,n_fold)
    for fold in range(n_fold):
        indxte = indices[fold];
        indxtr = np.setdiff1d(np.arange(0,n_sample), indxte)

        Xtr_v = X[indxtr,:]
        Ytr_v = Y[indxtr]
        Xte_v = X[indxte,:]
        Yte_v = Y[indxte]

        ########
        acc = []
        for g in grid_:
            w = train_batchSVM(Xtr_v,Ytr_v,g,max_iter,batch_size)
            gg = accuracy(Yte_v, test_algorithm(Xte_v,Yte_v , w))
            acc.append(gg)
        #######
    return grid_[np.argmax(acc)]