In [106]:
import numpy as np
from sklearn import datasets
import math
import time

In [148]:
# the logistic function
def logistic_func(theta, x):
    t = x.T @ theta
    g = np.zeros(t.shape)
    # split into positive and negative to improve stability
    g[t>=0.0] = 1.0 / (1.0 + np.exp(-t[t>=0.0])) 
    g[t<0.0] = np.exp(t[t<0.0]) / (np.exp(t[t<0.0])+1.0)
    return g

# function to compute output of LR classifier
def lr_predict(theta,x):
    # form Xtilde for prediction
    x = np.vstack((x.T , np.ones(x.shape[0])))
    return logistic_func(theta,x)

# function to evaluate objective function (-f)
def f_eval(theta, x, y):
    t = x.T @ theta
    return -np.vdot(t,y) + np.sum(np.log(1+np.exp(t)))

# function to compute the gradient of -f
def grad(theta, x, y):
    g = logistic_func(theta,x)
    return -(x @ (y-g))

def hessian(theta, x):
    g = logistic_func(theta, x)
    n = g.shape[0]
    return np.dot(np.dot(np.dot(x, np.diag(g.reshape(n))), np.diag(1-g.reshape(n))), x.T)
    
    
# gradient descent
# returns theta and number of iterations
def gradDesc(x, y, alpha, c, rho, delta, maxiter, backTracking=False):
    # Initialization
    theta = np.zeros(x.shape[0])
    d = -grad(theta, x, y) # 3*1
    k = 0
    inner = 0
    while (k < maxiter) and (np.linalg.norm(d) > delta):
        '''
        this is backtracking tragetegy, Or should I say: Strategy :)
        '''
        if backTracking:
            alpha, m = back_tracking(x, y, theta, d, alpha, c, rho)
        theta = theta + alpha * d
        d = -grad(theta, x, y)
        k = k + 1
        if backTracking:
            inner = inner + m
    total = k + inner
    return theta, k, total


# heavy ball method
# returns theta and number of iterations
def heavyBall(x, y, alpha, beta, c, rho, delta, maxiter, backTracking=False):
    # Initialization
    theta = [np.zeros(x.shape[0]), np.zeros(x.shape[0])]
    d = -grad(theta[-1], x, y) # 3*1
    k = 1
    inner = 0
    while (k < maxiter) and (np.linalg.norm(d) > delta):
        '''
        this is backtracking tragetegy, Or should I say: Strategy :)
        '''
        if backTracking:
            alpha, m = back_tracking(x, y, theta[-1], d, alpha, c, rho)      
        theta.append(theta[k] + alpha * d + beta * (theta[k] - theta[k-1]))
        d = -grad(theta[-1], x, y)
        k = k + 1
        if backTracking:
            inner = inner + m
    total = k + inner
    return theta[-1], k-1, total-1



# nesterov's method
# returns theta and number of iterations
def nesterov(x, y, alpha, c, rho, delta, maxiter, backTracking=False):
    # Initialization
    theta = [np.zeros(x.shape[0]), np.zeros(x.shape[0])]
    d = -grad(theta[-1], x, y) # 3*1
    k = 1
    p = 0
    inner = 0
    while (k < maxiter) and (np.linalg.norm(d) > delta):
        '''
        this is backtracking tragetegy, Or should I say: Strategy :)
        '''
        if backTracking:
            alpha, m = back_tracking(x, y, theta[-1], d, alpha, c, rho)
        theta.append(theta[k] + alpha * d + p)
        k = k + 1
        beta = (k - 1) / (k + 2)
        p = beta * (theta[-1] - theta[-2])
        d = -grad(theta[-1] + p, x, y)
        if backTracking:
            inner = inner + m
    total = k + inner
    return theta[-1], k-1, total-1



# newton's method
# returns theta and number of iterations
def newton(x, y, alpha, c, rho, delta, maxiter, backTracking=False):
    # Initialization
    theta = np.zeros(x.shape[0])
    d = np.linalg.inv(hessian(theta, x)) @ (-grad(theta, x, y)) # 3*1
    k = 0
    inner = 0
    while (k < maxiter) and (np.linalg.norm(d) > delta):
        '''
        this is backtracking tragetegy, Or should I say: Strategy :)
        '''
        if backTracking:
            alpha, m = back_tracking(x, y, theta, d, alpha, c, rho)
        theta = theta + alpha * d
        d = np.linalg.inv(hessian(theta, x)) @ (-grad(theta, x, y))
        k = k + 1
        if backTracking:
            inner = inner + m
    total = inner + k
    return theta, k, total

def bfgs(x, y, alpha, c, rho, delta, maxiter, backTracking=False):
    # Initialization    
    theta = np.zeros(x.shape[0])

    h=np.linalg.inv(hessian(theta,x))
    g=grad(theta, x, y)
    d = -h @ g  
    k = 0
    inner = 0
    while (k < maxiter) and (np.linalg.norm(d) > delta):
        theta0 = theta
        g0 = g
        if backTracking:
            alpha, m = back_tracking(x, y, theta, d, alpha, c, rho)
        theta = theta + alpha*d
        g = grad(theta, x, y)
        s = np.mat(theta-theta0).T
        r = np.mat(g-g0).T
        a = h @ r
        gama=s.T @ r
        h = h + np.array((gama+(r.T @ a))/gama**2)*np.array((s @ s.T)) - np.array((a @ s.T)/gama) - np.array((s @ a.T)/gama)
        d = -h @ g
        k = k + 1
        if backTracking:
            inner = inner + m
        total = inner + k
    return theta, k, total  


# back_tracking
# returns alpha
def back_tracking(x, y, theta, d, alpha, c, rho):
    
    '''
    Phi function, see notes
    '''
    def phi(alpha):
        return f_eval(theta + alpha * d, x, y)
    '''
    h function, see notes
    '''
    def h(alpha):
        return f_eval(theta, x, y) + c * alpha * (d.T @ d)
    '''
    backtracking
    '''
    m = 0
    while phi(alpha) > h(alpha):
        alpha = rho * alpha
        m += 1
    return alpha, m

# Generate dataset

In [142]:
## Generate dataset    
np.random.seed(2020) # Set random seed so results are repeatable
x,y = datasets.make_blobs(n_samples=100,n_features=2,centers=2,cluster_std=6.0)

# Form Xtilde
x = np.vstack((x.T , np.ones(x.shape[0])) ) #3*100

# Gradient Descent

In [143]:
'''
x: np.array
y: np.array
alpha: float
c: float 1e-4 - 0.3
rho: float 0.1 - 0.8
delta: float
maxiter: int
backTracking: bool, default: False
'''
# theta_gd, num_iters, total = gradDesc(x, y, 0.5, 0.1, 0.2, 1e-3, 10000, False)
theta_gd, num_iters, total = gradDesc(x, y, 0.5, 0.1, 0.2, 1e-3, 10000, True)
print('Number of iterations required (Gradient Descent): {0}'.format(num_iters))
print('Number of iterations required (Gradient Descent Combined backtracking): {0}'.format(total))
print('Solution: [{0} {1} {2}]^T'.format(theta_gd[0], theta_gd[1], theta_gd[2]))

Number of iterations required (Gradient Descent): 962
Number of iterations required (Gradient Descent Combined backtracking): 965
Solution: [-0.28086922947048915 -0.45756681602146676 2.2134228557652382]^T




# Heavy Ball Method

In [146]:
'''
x: np.array
y: np.array
alpha: float
beta: float
c: float
rho: float
delta: float
maxiter: int
backTracking: bool, default: False
'''

# theta_hbm, num_iters = heavyBall(x, y, 0.001, 0.95, 0.1, 0.2, 1e-3, 10000, False)
# theta_hbm, num_iters = heavyBall(x, y, 0.001, 0.9, 0.1, 0.2, 1e-3, 10000, False)
theta_hbm, num_iters, total = heavyBall(x, y, 0.25, 0.9, 0.1, 0.2, 1e-3, 10000, True)
print('Number of iterations required (Heavy Ball Method): {0}'.format(num_iters))
print('Number of iterations required (Heavy Ball Method Combined backtracking): {0}'.format(total))
print('Solution: [{0} {1} {2}]^T'.format(theta_hbm[0], theta_hbm[1], theta_hbm[2]))

Number of iterations required (Heavy Ball Method): 210
Number of iterations required (Heavy Ball Method Combined backtracking): 213
Solution: [-0.28090233848756674 -0.4576125163504644 2.2138087539926894]^T




# Nesterov's method

In [122]:
'''
x: np.array
y: np.array
alpha: float
c: float
rho: float
delta: float
maxiter: int
backTracking: bool, default: False
'''


# theta_nm, num_iters, total = nesterov(x, y, 0.001, 0.1, 0.2, 1e-3, 10000, False)
theta_nm, num_iters, total = nesterov(x, y, 0.5, 0.1, 0.2, 1e-3, 10000, True)
print('Number of iterations required (Nesterov\'s Method): {0}'.format(num_iters))
print('Number of iterations required (Nesterov\'s Method Combined backtracking): {0}'.format(total))
print('Solution: [{0} {1} {2}]^T'.format(theta_nm[0], theta_nm[1], theta_nm[2]))

Number of iterations required (Nesterov's Method): 162
Number of iterations required (Nesterov's Method Combined backtracking): 166
Solution: [-0.28122598034923835 -0.4580532065467381 2.2175508597052973]^T




# Newton's method

In [123]:
'''
x: np.array
y: np.array
alpha: float
c: float
rho: float
delta: float
maxiter: int
backTracking: bool, default: False
'''
# theta_Nm, num_iters, total = newton(x, y, 1.0, 0.1, 0.2, 1e-3, 10000, False)
theta_Nm, num_iters, total = newton(x, y, 1.0, 0.1, 0.2, 1e-3, 10000, True)
print('Number of iterations required (Newton\'s method): {0}'.format(num_iters))
print('Number of iterations required (Newton\'s Method  Combined backtracking): {0}'.format(total))
print('Solution: [{0} {1} {2}]^T'.format(theta_Nm[0], theta_Nm[1], theta_Nm[2]))

Number of iterations required (Newton's method): 6
Number of iterations required (Newton's Method  Combined backtracking): 6
Solution: [-0.28087302535408804 -0.45755993255549 2.2135573728358855]^T


# BFGS

In [149]:
'''
x: np.array
y: np.array
alpha: float
c: float
rho: float
delta: float
maxiter: int
backTracking: bool, default: False
'''

theta_bfgs, num_iters, total = bfgs(x, y, 1.0, 0.1, 0.2, 1e-3, 10000, True)
print('Number of iterations required (BFGS): {0}'.format(num_iters))
print('Number of iterations required (BFGS Combined backtracking): {0}'.format(total))
print('Solution: [{0} {1} {2}]^T'.format(theta_bfgs[0], theta_bfgs[1], theta_bfgs[2]))

Number of iterations required (BFGS): 12
Number of iterations required (BFGS Combined backtracking): 12
Solution: [-0.28096747662317473 -0.4575502012675465 2.214046315782588]^T
