In [1]:
import numpy as np
import pandas as pd

from MySVM import MySVM

# load dataset
data = pd.read_csv('hw2_q2_q4_dataset.csv', header=None).to_numpy()
X = data[:,:-1]
y = data[:,-1]

# change labels from 0 and 1 to -1 and 1 for SVM
y[y == 0] = -1

num_data, num_features = X.shape

# shuffle dataset
np.random.seed(2023)
perm = np.random.permutation(num_data)

X = X.tolist()
y = y.tolist()

X = [X[i] for i in perm]
y = [y[i] for i in perm]

X = np.array(X)
y = np.array(y)

# append column of 1s to include intercept
X = np.hstack((X, np.ones((num_data, 1))))
num_data, num_features = X.shape

# Split dataset into train and test sets
NUM_TRAIN = int(np.ceil(num_data*0.8))
NUM_TEST = num_data - NUM_TRAIN

X_train = X[:NUM_TRAIN]
X_test = X[NUM_TRAIN:]
y_train = y[:NUM_TRAIN]
y_test = y[NUM_TRAIN:]


In [13]:
w = np.array([1.83769818 -4.17701846])
w@w.T

5.472419372419278

In [4]:
best_svm = MySVM(d = 10**-6, max_iters = 1000, eta = 0.001, c = 0.01)
# fit model using all training data
best_svm.fit(X_train, y_train)
# predict on test data
loss = best_svm.predict(X_test)

  j = 0.5*w_t@w_t.T + c*np.sum(hinge_loss)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(self.w_t@X_f.T + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


In [10]:
class MySVM:

    def __init__(self, d, max_iters, eta, c):
        self.d = d
        self.max_iters = max_iters
        self.eta = eta
        self.c = c

    def fit(self, X, y):
        self.intercept = np.zeros(1)
        self.w_t = np.random.uniform(-0.01, 0.01, size = X.shape[1]-1)
        hinge_loss = []
        cost = []
        X_f = X[:, :-1] # Only features
        X_i = X[:, -1] # Only intercept
        for i in range(self.max_iters):
            hl_comp = y@(self.w_t@X_f.T + self.intercept*X_i)
            hinge_loss.append(max(0, 1 - hl_comp))
            error = X_f@self.w_t - y
            self.intercept = self.intercept - self.eta*error.sum()
            grad = X_f.T.dot(error)
            if hinge_loss[len(hinge_loss) - 1] == 0:
                self.w_t[0:] = self.w_t[0:] - self.eta*grad
                cost.append(svmCost(self.w_t[0:], self.c, hinge_loss))
            else:
                self.w_t[0:] = self.w_t[0:] - self.eta*grad
                cost.append(svmCost(self.w_t[0:], self.c, hinge_loss))
            if len(cost) > 2:
                if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
                    break
        self.cost = cost
    def predict(self, X):
        X_f = X[:, :-1] # Only features
        X_i = X[:, -1] 
        prediction = X_f@self.w_t + self.intercept*X_i
        return np.sign(prediction)


In [17]:
def svmCost(w_t, c, hinge_loss):
    j = 0.5*w_t@w_t.T + c*np.sum(hinge_loss)
    return j

In [49]:
max_iters = 1000
eta = 0.001
d = 10**-6
intercept = np.zeros(1)
c = 0.01
w_t = np.random.uniform(-0.01, 0.01, size = X.shape[1]-1)
hinge_loss = []
cost = []
X_f = X[:, :-1] # Only features
X_i = X[:, -1] # Only intercept

In [50]:
for i in range(max_iters):
    hl_comp = y@(w_t@X_f.T + intercept*X_i)
    hinge_loss.append(max(0, 1 - hl_comp))
    error = X_f@w_t - y
    intercept = intercept - eta*error.sum()
    grad = X_f.T.dot(error)
    if hinge_loss[len(hinge_loss) - 1] == 0:
        w_t[0:] = w_t[0:] - eta*grad
        cost.append(svmCost(w_t[0:], c, hinge_loss))
    else:
        w_t[0:] = w_t[0:] - eta*grad
        cost.append(svmCost(w_t[0:], c, hinge_loss))
    print(w_t)
    #print(hinge_loss)
    if len(cost) > 2:
        if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= d:
            
            break


[ 1.83769818 -4.17701846]
[26.98552009 87.10431388]
[-2014.51878189 -2951.79568543]
[ 93531.39450323 117553.47222568]
[-4029449.56703508 -4895266.22349258]
[1.70879092e+08 2.06012833e+08]
[-7.22114128e+09 -8.69078927e+09]
[3.04914783e+11 3.66827335e+11]
[-1.28728069e+13 -1.54852404e+13]
[5.43438509e+14 6.53711896e+14]
[-2.29415933e+16 -2.75967273e+16]
[9.68491559e+17 1.16500932e+18]
[-4.08853669e+19 -4.91814531e+19]
[1.72599651e+21 2.07621989e+21]
[-7.28638164e+22 -8.76486727e+22]
[3.07598289e+24 3.70013307e+24]
[-1.29854174e+26 -1.56202990e+26]
[5.48185967e+27 6.59418826e+27]
[-2.31419480e+29 -2.78376994e+29]
[9.76949045e+30 1.17518257e+31]
[-4.12423983e+32 -4.96109268e+32]
[1.74106872e+34 2.09435038e+34]
[-7.35000968e+35 -8.84140611e+35]
[3.10284378e+37 3.73244433e+37]
[-1.30988120e+39 -1.57567026e+39]
[5.52972979e+40 6.65177174e+40]
[-2.33440341e+42 -2.80807910e+42]
[9.85480213e+43 1.18544480e+44]
[-4.16025459e+45 -5.00441522e+45]
[1.75627253e+47 2.11263921e+47]
[-7.41419334e+48 -8.

  j = 0.5*w_t@w_t.T + c*np.sum(hinge_loss)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= d:
  hl_comp = y@(w_t@X_f.T + intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


In [11]:
svm = MySVM(d = 10**-6, max_iters = 1000, eta = 0.0001, c = 0.01)
svm.fit(X = X, y = y)

  j = 0.5*w_t@w_t.T + c*np.sum(hinge_loss)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(self.w_t@X_f.T + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)


In [2]:
from my_cross_val import my_cross_val

eta_vals = [0.00001, 0.0001, 0.001]
C_vals = [0.01, 0.1, 1, 10, 100]

# SVM
for eta_val in eta_vals:
    for c_val in C_vals:
        print("eta: ", eta_val, "c: ", c_val)
        # instantiate svm object
        svm = MySVM(d = 10**-6, max_iters = 1000, eta = eta_val, c = c_val)
        # call to CV function to compute error rates for each fold
        svm_err_rate = my_cross_val(svm, loss_func = 'err_rate', X = X, y = y)
        # print error rates from CV
        print(svm_err_rate)
        # fit model using all training data
        #svm.fit(X_train, y_train)
        # predict on test data
        #loss = svm.predict(X_test)
        #print(loss)

eta:  1e-05 c:  0.01
[0.57, 0.485, 0.5, 0.525, 0.445, 0.5, 0.515, 0.47, 0.47, 0.52, {'Mean': 0.5, 'sd': 0.0335}]
eta:  1e-05 c:  0.1
[0.53, 0.5, 0.505, 0.52, 0.505, 0.52, 0.455, 0.495, 0.485, 0.485, {'Mean': 0.5, 'sd': 0.0206}]
eta:  1e-05 c:  1
[0.53, 0.49, 0.505, 0.49, 0.53, 0.515, 0.48, 0.465, 0.535, 0.46, {'Mean': 0.5, 'sd': 0.0259}]
eta:  1e-05 c:  10
[0.54, 0.495, 0.475, 0.545, 0.495, 0.5, 0.57, 0.43, 0.47, 0.48, {'Mean': 0.5, 'sd': 0.0392}]
eta:  1e-05 c:  100
[0.435, 0.545, 0.55, 0.515, 0.48, 0.5, 0.5, 0.505, 0.48, 0.49, {'Mean': 0.5, 'sd': 0.0315}]
eta:  0.0001 c:  0.01


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.0001 c:  0.1


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.0001 c:  1


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, 

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.0001 c:  10


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0:] = self.w_t[0:] - self.eta*grad
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, w

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.0001 c:  100


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0:] = self.w_t[0:] - self.eta*grad
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0:] = self.w_t[0:] - self.eta*grad
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.001 c:  0.01


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t 

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.001 c:  0.1


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.001 c:  1


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0:] = self.w_t[0:] - self.eta*grad
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - 

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.001 c:  10


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  self.w_t[0:] = self.w_t[0:] - self.eta*grad
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= se

[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]
eta:  0.001 c:  100


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)


[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, {'Mean': 1.0, 'sd': 0.0}]


  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  j = 0.5*np.linalg.norm(w_t, ord=2) + c*np.sum(hinge_loss)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, out, keepdims, initial, where)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  if np.abs(cost[len(cost) - 1] - cost[len(cost) - 2]) <= self.d:
  hl_comp = y@(X_f@self.w_t + self.intercept*X_i)
  return umr_sum(a, axis, dtype, o