In [1]:
import numpy as np
import time
import matplotlib.pyplot as plt
import cvxpy as cp

## Generate the random Dataset


In [2]:
# We first generate a random dataset with number of features (m = 10) and number of instances (n = 100)
# We also generate a random label vector y \in {-1,1}

n = 100 # Number of instances
m = 10  # Number of Features 

X = np.random.rand(n,m) 
y = np.random.rand(n) # n-dimensional vector
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1) # m-dimensional vector
print(y)
print(X)

[-1 -1  1  1  1 -1 -1 -1 -1  1  1  1 -1 -1 -1  1 -1 -1 -1  1  1  1 -1 -1
 -1  1  1  1  1 -1  1  1 -1 -1  1  1 -1  1  1  1 -1  1 -1  1  1 -1 -1 -1
 -1  1 -1 -1  1 -1  1  1 -1 -1 -1 -1  1 -1 -1  1 -1 -1  1  1 -1  1  1  1
  1 -1 -1  1 -1  1  1  1  1  1 -1 -1 -1  1  1 -1 -1 -1 -1 -1 -1 -1  1  1
  1  1 -1 -1]
[[8.92311919e-01 5.17576028e-01 8.17059209e-01 2.59403210e-01
  9.25906087e-03 9.71903096e-02 7.97539063e-01 6.27777163e-01
  5.03892928e-01 7.74041526e-01]
 [7.76606002e-01 4.67377071e-01 2.08834827e-01 4.76257918e-01
  7.92382917e-01 6.90983139e-02 6.43466097e-01 5.59696720e-01
  2.71485295e-01 6.08884657e-01]
 [6.56768378e-01 4.64746214e-01 4.09685353e-01 7.06114705e-01
  9.66180171e-01 6.78357351e-01 5.99979791e-01 1.32238665e-01
  7.13726891e-01 3.18150821e-01]
 [1.08828095e-01 4.63689527e-01 7.95430476e-01 5.14808203e-01
  9.26239681e-01 8.82416651e-01 9.13825548e-01 3.09526389e-01
  5.52754286e-01 2.21337169e-02]
 [5.46135647e-01 8.50308810e-01 3.15503894e-01 5.78384759e-01
  5.

## An Implementation of the Logistic Loss 


In [5]:
def LogisticLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    n = w.shape[0] # feature length 
    m = X.shape[0] # number of training examples
    
    g = np.zeros((n,1))

    l2_sum = 0
    for i in range(n):
      l2_sum += w[i][0]**2
      g[i][0] += lam*w[i][0]

    f = 0 # cost function 
    for i in range(m):
      val =0 # storing the value of inner product of weight and row of X
      for j in range(n):
        val += w[j][0]*X[i][j]
      f += np.log(1+np.exp(-y[i]*val))
      for j in range(n):
        g[j][0] += -y[i]*X[i][j]/(1+np.exp(val*y[i]))
      
    f += 0.5*lam*l2_sum
    
    return [f, g]

In [6]:
start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.019153356552124023
Function value = 140.11780276559827
Printing Gradient:
[[20.65308792]
 [19.4908496 ]
 [23.3815784 ]
 [22.13233807]
 [23.25180053]
 [22.07824811]
 [25.46977585]
 [20.04218285]
 [25.08716677]
 [22.91347968]]


## An Implementation of the Least Squares 


In [13]:
def LeastSquaresNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    n = w.shape[0] # feature length 
    m = X.shape[0] # number of training examples

    y_hat = np.zeros(m)
    g = np.zeros((n,1))
    f =0

    l2_sum = 0
    for i in range(n):
      l2_sum += w[i][0]**2
      g[i][0] += lam*w[i][0]

    for i in range(m):
      for j in range(n):
        y_hat[i] += X[i][j]*w[j][0]

      for j in range(n):
        g[j] += 2*(y_hat[i]-y[i])*X[i][j]
      
      f += (y_hat[i] - y[i])**2

    f += 0.5*lam*l2_sum

    return [f, g]     

In [14]:
start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.008816242218017578
Function value = 765.5306437937076
Printing Gradient:
[[247.02390047]
 [240.72484553]
 [265.22377036]
 [267.2425564 ]
 [257.02299075]
 [262.49868215]
 [283.15028227]
 [238.79272922]
 [273.41676204]
 [258.88130636]]


## An Implementation of the Hinge Loss 

In [15]:
def HingeLossNaive(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    n = w.shape[0] # feature length 
    m = X.shape[0] # number of training examples

    g = np.zeros((n,1))
    f= 0

    l2_sum = 0
    for i in range(n):
      l2_sum += w[i][0]**2
      g[i][0] += lam*w[i][0]

    for i in range(m):
      val =0 # storing the value of inner product of weight and row of X
      for j in range(n):
        val += w[j][0]*X[i][j]
      f += max(0, 1-y[i]*val)
      if 1-y[i]*val > 0:
        for j in range(n):
          g[j][0] += -y[i]*X[i][j]

    f += 0.5*lam*l2_sum

    return [f, g]

In [16]:
start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Time Taken = 0.007283926010131836
Function value = 183.39961206390316
Printing Gradient:
[[24.36781137]
 [23.42740412]
 [27.00925743]
 [26.15522556]
 [26.79737846]
 [26.13755516]
 [29.23429996]
 [24.07483467]
 [29.13480161]
 [26.96471302]]


## Scalability of the code

In [33]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossNaive(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresNaive(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossNaive(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

  f += np.log(1+np.exp(-y[i]*val))
  g[j][0] += -y[i]*X[i][j]/(1+np.exp(val*y[i]))


Logistic Loss
Time Taken = 8.705084323883057
Function value = inf
Printing Gradient:
[[23.47291348]
 [18.30352544]
 [21.56395418]
 ...
 [20.70788864]
 [20.40633109]
 [21.35320558]]
Least Square
Time Taken = 4.2392542362213135
Function value = 629204342.6278816
Printing Gradient:
[[256936.88452829]
 [202634.63426419]
 [249069.88869177]
 ...
 [225531.29065722]
 [247023.52015019]
 [244225.93992716]]
Hinge Loss
Time Taken = 3.4823145866394043
Function value = 114642.60335262673
Printing Gradient:
[[23.47291348]
 [18.30352544]
 [21.56395418]
 ...
 [20.70788864]
 [20.40633109]
 [21.35320558]]


## Implement a vectorized version 

In [18]:
def LogisticLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    pdt = np.dot(X,w).reshape(-1,1)
    f =  0.5*lam*np.sum(w**2) + np.sum(np.log(1+np.exp(-np.multiply(np.reshape(y,(n,1)),pdt))))
    c = -np.multiply(np.reshape(y,(n,1)), 1/(1+np.exp(np.multiply(np.reshape(y,(n,1)),pdt))))
    g = lam*w+ np.reshape(np.sum(np.multiply(X, c), axis=0), (-1,1))
    return [f, g]     

In [28]:
def LeastSquaresVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    y_hat = np.dot(X, w)
    print(y_hat.shape)
    f = np.sum((y_hat - y)**2) + 0.5*lam*np.sum(w**2)
    g = lam*w
    g += np.reshape(2*np.dot((y_hat.reshape(-1)-y).reshape(1, -1), X), (-1,1))
    return [f, g]     

In [29]:
def HingeLossVec(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    pdt = 1 - np.multiply(y.reshape(-1, 1), np.dot(X,w))
    pdt_xy = -np.multiply(y.reshape(-1, 1), X)
    cost = 0.5*lam*np.sum(np.power(w, 2)) + np.sum(np.maximum(0, pdt))
    grad = lam*w + np.reshape(np.sum(np.multiply(pdt_xy, np.sign(np.maximum(0, pdt))), axis=0), (-1,1))
    return [f, g]

In [34]:
n = 100
m = 10000

# X = np.random.rand(n,m)
# y = np.random.rand(n)
# ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
# y = np.array(ybin)
# w = np.random.rand(m, 1)

start = time.time()
[f,g] = LogisticLossVec(w,X,y,1)
end = time.time()
print("Logistic Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = LeastSquaresVec(w,X,y,1)
end = time.time()
print("Least Square")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

start = time.time()
[f,g] = HingeLossVec(w,X,y,1)
end = time.time()
print("Hinge Loss")
print("Time Taken = " + str(end - start))
print("Function value = " + str(f))
print("Printing Gradient:")
print(g)

Logistic Loss
Time Taken = 0.004076480865478516
Function value = inf
Printing Gradient:
[[23.47291348]
 [18.30352544]
 [21.56395418]
 ...
 [20.70788864]
 [20.40633109]
 [21.35320558]]
(100, 1)
Least Square
Time Taken = 0.007275581359863281
Function value = 62920251624.59748
Printing Gradient:
[[256936.88452829]
 [202634.63426419]
 [249069.88869177]
 ...
 [225531.29065722]
 [247023.52015019]
 [244225.93992716]]
Hinge Loss
Time Taken = 0.020933866500854492
Function value = 62920251624.59748
Printing Gradient:
[[256936.88452829]
 [202634.63426419]
 [249069.88869177]
 ...
 [225531.29065722]
 [247023.52015019]
 [244225.93992716]]


  f =  0.5*lam*np.sum(w**2) + np.sum(np.log(1+np.exp(-np.multiply(np.reshape(y,(n,1)),pdt))))
  c = -np.multiply(np.reshape(y,(n,1)), 1/(1+np.exp(np.multiply(np.reshape(y,(n,1)),pdt))))


## Lets us code the above Loss Fuctions in CVXPY!

CVXPY is an open source Python-embedded modeling language for convex optimization problems. Link: https://www.cvxpy.org/

In [None]:
def LogisticLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
   
    

    return [f, g]

In [None]:
def LeastSquaresCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient

    x = cp.Variable(n)
    f = cp.sum_squares(X @ w- y)
    g = cp.sum(0.5*lam*w**2)
    
    
    return [f, g]

In [None]:
def HingeLossCVXPY(w, X, y, lam):
    # Computes the cost function for all the training samples
    # where f is the function value and g is the gradient
    return [f, g]

In [None]:
import numpy as np
n = 100
m = 10

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

start = time.time()
[f1,g1] = LogisticLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value Naive = " + str(f1))
print("Printing Gradient Naive:")
print(g1)

start = time.time()
[f2,g2] = LeastSquaresCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

start = time.time()
[f2,g2] = HingeLossCVXPY(w,X,y,1)
end = time.time()
print("Time Taken = " + str(end - start))
print("Function value For = " + str(f2))
print("Printing Gradient For:")
print(g2)

## Compare the losses with Graph



In [None]:
def LogisticLossFun(w, X, y, lam):
    return error_ll

def LeastSquaresFun(w, X, y, lam):
    return error_ls

def HingeLossFun(w, X, y, lam):
    return error_hl

def plot_errors(error_ll, error_ls, error_hl, num):
    plt.plot(num, error_ll, label="Logistic Loss")
    plt.plot(num, error_ls, label="Least Squares")
    plt.plot(num, error_hl, label="Hinge Loss")
    plt.show()
    return

In [None]:
n = 100
m = 10000

X = np.random.rand(n,m)
y = np.random.rand(n)
ybin = [(int(yi >= 0.5) - int(yi < 0.5)) for yi in y]
y = np.array(ybin)
w = np.random.rand(m, 1)

error_ll = LogisticLossFun(w,X,y,1)
error_ls = LeastSquaresFun(w,X,y,1)
error_hl = HingeLossFun(w,X,y,1)
plot_errors(error_ll, error_ls, error_hl, 100)