In [1]:
from scipy import io
fname = "data/aut-avn.mat"
content = io.loadmat(fname, struct_as_record=True)
X = content['X']
y = content['Y']
print(type(X))
print(type(y))
print(X.get_shape())
print("Amount of non-zero entries:", X.count_nonzero())

<class 'scipy.sparse.csc.csc_matrix'>
<class 'numpy.ndarray'>
(71175, 20707)
Amount of non-zero entries: 3652855


In [2]:
import numpy
from scipy import optimize
from scipy import sparse

class SparseLSSVM():
    
    def __init__(self, lam=1.0):
        """ Instantiates the regression model.
        
        Parameters
        ----------
        lam : float, default 1.0
            The regularization parameter lambda
        """
        
        self.lam = lam

    def get_params(self, deep=True):
        """ Returns the parameters of the model
        """
        
        return {"lam": self.lam}

    def set_params(self, **parameters):
        """ Sets the parameters of the model
        """        
        
        for parameter, value in parameters.items():
            setattr(self, parameter, value)
            
        return self
    
    def fit(self, X, y):
        """
        Fits the model.

        Parameters
        ----------
        X : Array of shape [n_samples, n_features]
            Sparse data matrix
        y : Array of shape [n_samples, 1]
            Dense vector
        """   
        
        y = numpy.array(y).reshape((len(y), 1))
        
        self._n = X.shape[0]
        self._X, self._y = X, y
        
        # make use of optimizer provided by the scipy package
        # DO NOT CHANGE ANYTHING HERE
        start = numpy.zeros(self._X.shape[0], numpy.float64).reshape((-1,1))
        self.c_opt, self.f_opt, d = optimize.fmin_l_bfgs_b(self._get_function,
                                                 start,
                                                 m=10,
                                                 fprime=self._get_function_grad,
                                                 iprint=1)
        self.c_opt = self.c_opt.reshape((-1,1))

        return self
    
    def _get_function(self, c):
        
        c = c.reshape((-1,1))
        A = self._X.T@c
        f = (1/self._n)*(self._y - self._X@A).T@(self._y-self._X@A) + self.lam*(c.T@self._X@A)
        
        return f
    
    def _get_function_grad(self, c):
    
        c = c.reshape((-1,1))
        
        # Do calcuations before to get vectors out of it 
        # and no matrix times matrix operation
        A = self._X.T@c
        B = self._y - self._X@A
        C = self._X.T@B
        
        grad = -2/self._n * self._X@C + 2*self.lam*self._X@A 
        return grad
    
    def predict(self, X):
        """
        Computes predictions for a new set of points.

        Parameters
        ----------
        X : Array of shape [n_samples, n_features]
            Sparse data matrix

        Returns
        -------
        predictions : Array of shape [n_samples, 1]
        """           
        A = self._X.T@self.c_opt
        preds = X@A
        # NOTE: You need to convert the real-valued 
        # predictions to -1 or +1 depending on them
        # being negative or positive
        
        return numpy.sign(preds)
                        

In [3]:
# DO NOT CHANGE ME
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=1)

print("Number of training instances: {}".format(X_train.shape[0]))
print("Number of test instances: {}".format(X_test.shape[0]))
print("Number of features: {}".format(X_train.shape[1]))

clf = SparseLSSVM(lam=0.001)
clf.fit(X_train, y_train)

Number of training instances: 64057
Number of test instances: 7118
Number of features: 20707


<__main__.SparseLSSVM at 0x11098dd30>

In [4]:
# DO NOT CHANGE ME
from sklearn.metrics import accuracy_score
preds = clf.predict(X_test)
acc = accuracy_score(y_test, preds)
print("Accuracy of model: {acc}".format(acc=acc))

Accuracy of model: 0.9592582186007306
