In [3]:
%%time
df_X_train = pd.read_csv('Xtr.csv', header=None, usecols=np.arange(3072))
df_X_test = pd.read_csv('Xte.csv', header=None, usecols=np.arange(3072))
df_y_train = pd.read_csv('Ytr.csv')

CPU times: user 4.95 s, sys: 116 ms, total: 5.06 s
Wall time: 5.06 s


In [55]:
%%time
X_train = np.array(df_X_train, dtype=float)
X_test = np.array(df_X_test, dtype=float)
y_train = np.array(df_y_train['Prediction'], dtype=float)

CPU times: user 24 ms, sys: 16 ms, total: 40 ms
Wall time: 38.4 ms


In [77]:
import numpy as np
import pandas as pd
import scipy
from numpy import linalg
import cvxopt
from cvxopt import solvers, matrix
from scipy.spatial.distance import pdist, squareform

def linear_kernel(x1, x2):
    return np.dot(x1, x2)

def polynomial_kernel(x, y, p=3):
    return (1 + np.dot(x, y)) ** p

def gaussian_kernel(x, y, sigma=1):
    return np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))

def gaussian_kernel_matrix(X, sigma=0.5):
    pairwise_dists = squareform(pdist(X, 'euclidean'))
    K = scipy.exp(-sigma*pairwise_dists ** 2)
    return K

class SVM:
    def __init__(self, C=1, kernel='rbf', gamma=0.5):
        self.C = C
        self.kernel = kernel # kernel_function 'rbf', 'linear'
        self.gamma = gamma # Kernel coefficient gamma for 'rbf'
        
    def fit(self, X, y, mode='OVA'):

        self.n_sample_ = y.shape[0] # n_sample
        self.classes_ = np.unique(y)
        self.alphas_ = {}
        self.K_ = self.fit_kernel(X)
        if mode == 'OVA':
            for class_ in self.classes_:
                y_copy = y.copy()
                y_copy[y_copy != class_] = -1
                y_copy[y_copy == class_] = 1
                self.fit_dual(y_copy)
                sol = solvers.qp(matrix(2*self.K_), matrix(self.p_), matrix(self.G_), matrix(self.h_))
                self.alphas_[class_] = np.array(sol['x']).reshape(-1,)
                
        return self
        
    def predict(self, X_test):
        
        predictions = {}
        self.K_test_ = self.fit_kernel(X_test)
        self.n_test_ = X_test.shape[0] # size of the test sample
        n = self.n_test_
        res_mat = np.empty((self.classes_.shape[0], n))
        
        for class_ in self.classes_:
            alpha = self.alphas_[class_]
            res_mat[class_] = np.sum(alpha*self.K_test_, axis=1)
        y_pred = res_mat.argmax(axis=0)
        return y_pred  
    
    def fit_kernel(self, X):
        
        if self.kernel == 'rbf':
            pairwise_dists = squareform(pdist(X, 'euclidean'))
            K = scipy.exp(-self.gamma*pairwise_dists ** 2)
            return K
        
        elif self.kernel == 'linear':
            # In fact it's not a kernel
            K = squareform(pdist(X, 'minkowski', 1))
            return K
        
        else:
            raise Exception('the kernel must either be rbf or linear')
            
    def fit_dual(self, y):
        
        n = self.n_sample_
        diag_y = np.diag(y)
        self.p_ = (-y).reshape(-1,1)
        print(self.p_.shape)
        self.Q_ = 2*self.K_ # Quadratic matrix
        self.G_ = np.r_[diag_y, -diag_y] # Constraint matrix of size(2*n, n)
        self.h_ = np.r_[self.C*np.ones(n), np.zeros(n)]
        
        return self
    
    

In [78]:
X.shape

(500, 3072)

In [79]:
X = X_train[:500]
y = y_train[:500]
svm = SVM()
svm.fit(X, y)

(500, 1)
     pcost       dcost       gap    pres   dres
 0: -7.3436e+01 -9.3239e+02  3e+03  2e+00  7e-16
 1: -6.7455e+01 -5.5893e+02  6e+02  2e-01  5e-16
 2: -7.2216e+01 -1.1183e+02  4e+01  3e-03  7e-16
 3: -7.4473e+01 -7.7220e+01  3e+00  1e-04  3e-16
 4: -7.4880e+01 -7.5107e+01  2e-01  8e-06  3e-16
 5: -7.4944e+01 -7.4955e+01  1e-02  2e-07  2e-16
 6: -7.4949e+01 -7.4950e+01  6e-04  2e-09  3e-16
 7: -7.4950e+01 -7.4950e+01  3e-05  3e-11  2e-16
Optimal solution found.
(500, 1)
     pcost       dcost       gap    pres   dres
 0: -5.2077e+01 -9.8460e+02  4e+03  2e+00  7e-16
 1: -4.2382e+01 -6.0989e+02  8e+02  3e-01  6e-16
 2: -4.4112e+01 -1.0233e+02  6e+01  2e-03  1e-15
 3: -5.0692e+01 -5.4829e+01  4e+00  5e-05  5e-16
 4: -5.1651e+01 -5.2034e+01  4e-01  3e-06  3e-16
 5: -5.1827e+01 -5.1853e+01  3e-02  9e-08  2e-16
 6: -5.1843e+01 -5.1844e+01  1e-03  1e-09  3e-16
 7: -5.1844e+01 -5.1844e+01  5e-05  2e-11  3e-16
 8: -5.1844e+01 -5.1844e+01  2e-06  2e-13  3e-16
Optimal solution found.
(500,

<__main__.SVM at 0x7f89ec3ecf60>

In [81]:
res = svm.predict(X_train[:500,:])



In [23]:
%%time
X_train = np.array(df_X_train)
X_test = np.array(df_X_test)
y_train = np.array(df_y_train['Prediction'])

CPU times: user 12 ms, sys: 24 ms, total: 36 ms
Wall time: 35.8 ms


In [5]:
def computing_gram_matrix3(X, sigma=0.5):
    f = lambda x, y: np.exp(-linalg.norm(x-y)**2 / (2 * (sigma ** 2)))
    return squareform(pdist(X, f))

In [6]:
import scipy
from scipy.spatial.distance import pdist, squareform

def computing_gram_matrix(X, sigma=1):
    n, p = X.shape
    K = np.zeros((n,n))
    for ii in range(n):
        for kk in range(n):
            K[ii, kk] = np.exp(-linalg.norm(X[ii]-X[kk])**2 / (2 * (sigma ** 2)))
    return K

def computing_gram_matrix2(X, sigma=1):
    pairwise_dists = squareform(pdist(X, 'euclidean'))
    K = scipy.exp(-pairwise_dists ** 2 / (2*sigma**2))
    return K

In [7]:
%%time
K1 = computing_gram_matrix(X_train)

CPU times: user 10min 22s, sys: 64 ms, total: 10min 22s
Wall time: 10min 22s


In [8]:
%%time
K2=computing_gram_matrix2(X_train)

CPU times: user 54.9 s, sys: 80 ms, total: 55 s
Wall time: 55 s


In [11]:
def __init__(self, penalty='l2', loss='squared_hinge', dual=True, tol=1e-4,
                 C=1.0, multi_class='ovr', fit_intercept=True,
                 intercept_scaling=1, class_weight=None, verbose=0,
                 random_state=None, max_iter=1000):
    self.dual = dual
    self.tol = tol
    self.C = C
    self.multi_class = multi_class
    self.fit_intercept = fit_intercept
    self.intercept_scaling = intercept_scaling
    self.class_weight = class_weight
    self.verbose = verbose
    self.random_state = random_state
    self.max_iter = max_iter
    self.penalty = penalty
    self.loss = loss

def fit(self, X, y, sample_weight=None):
    """Fit the model according to the given training data.
    Parameters
    ----------
    X : {array-like, sparse matrix}, shape = [n_samples, n_features]
        Training vector, where n_samples in the number of samples and
        n_features is the number of features.
    y : array-like, shape = [n_samples]
        Target vector relative to X
    sample_weight : array-like, shape = [n_samples], optional
        Array of weights that are assigned to individual
        samples. If not provided,
        then each sample is given unit weight.
    Returns
    -------
    self : object
        Returns self.
    """
    # FIXME Remove l1/l2 support in 1.0 -----------------------------------
    msg = ("loss='%s' has been deprecated in favor of "
           "loss='%s' as of 0.16. Backward compatibility"
           " for the loss='%s' will be removed in %s")

    if self.loss in ('l1', 'l2'):
        old_loss = self.loss
        self.loss = {'l1': 'hinge', 'l2': 'squared_hinge'}.get(self.loss)
        warnings.warn(msg % (old_loss, self.loss, old_loss, '1.0'),
                      DeprecationWarning)
    # ---------------------------------------------------------------------

    if self.C < 0:
        raise ValueError("Penalty term must be positive; got (C=%r)"
                         % self.C)

    X, y = check_X_y(X, y, accept_sparse='csr',
                     dtype=np.float64, order="C")
    check_classification_targets(y)
    self.classes_ = np.unique(y)

    self.coef_, self.intercept_, self.n_iter_ = _fit_liblinear(
        X, y, self.C, self.fit_intercept, self.intercept_scaling,
        self.class_weight, self.penalty, self.dual, self.verbose,
        self.max_iter, self.tol, self.random_state, self.multi_class,
        self.loss, sample_weight=sample_weight)

    if self.multi_class == "crammer_singer" and len(self.classes_) == 2:
        self.coef_ = (self.coef_[1] - self.coef_[0]).reshape(1, -1)
        if self.fit_intercept:
            intercept = self.intercept_[1] - self.intercept_[0]
            self.intercept_ = np.array([intercept])

    return self

array([[ 1.        ,  0.00866644,  0.02227502, ...,  0.0104907 ,
         0.02156173,  0.01812238],
       [ 0.00866644,  1.        ,  0.00345238, ...,  0.00275313,
         0.00322442,  0.00347302],
       [ 0.02227502,  0.00345238,  1.        , ...,  0.00580637,
         0.0075947 ,  0.01033267],
       ..., 
       [ 0.0104907 ,  0.00275313,  0.00580637, ...,  1.        ,
         0.0033863 ,  0.00503564],
       [ 0.02156173,  0.00322442,  0.0075947 , ...,  0.0033863 ,
         1.        ,  0.00542766],
       [ 0.01812238,  0.00347302,  0.01033267, ...,  0.00503564,
         0.00542766,  1.        ]])

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()