### Dataset
Lets Load the dataset. We shall use the following datasets:
Features are in: "sido0_train.mat"
Labels are in: "sido0_train.targets"

In [2]:
from scipy.io import loadmat
import numpy as np

X = loadmat(r"sido0_matlab/sido0_train.mat")
y = np.loadtxt(r"sido0_matlab/sido0_train.targets")

# Statistics of the Dense Format of X
X = X['X'].todense()
print(X.shape)

(12678, 4932)


### Logistic Regression Definition
Lets use the Logistic Regression definition we previously used


In [2]:
def LogisticLoss(w, X, y, lam):
    # Computes the cost function for all the training samples
    m = X.shape[0]
    Xw = np.dot(X,w)
    yT = y.reshape(-1,1)
    yXw = np.multiply(yT,Xw)
    f = np.sum(np.logaddexp(0,-yXw)) + 0.5*lam*np.sum(np.multiply(w,w))
    gMul = 1/(1 + np.exp(yXw))
    ymul = -1*np.multiply(yT, gMul)
    g =  np.dot(ymul.reshape(1,-1),X) + lam*w.reshape(1,-1)
    g = g.reshape(-1,1)
    return [f, g]      

### Reinvoking Gradient Descent Armiojo V4
Lets invoke the final version of Armijo Line Search GD

In [3]:
from numpy import linalg as LA

def gdArmijo(funObj,w,maxEvals,alpha,gamma,X,y,lam, verbosity, freq):
    [f,g] = funObj(w,X,y,lam)
    funEvals = 1
    funVals = []
    f_old = f
    g_old = g
    funVals.append(f)
    alpha = 1/LA.norm(g)
    numBackTrack = 0
    while(1):
        wp = w - alpha*g
        [fp,gp] = funObj(wp,X,y,lam)
        funVals.append(f)
        funEvals = funEvals+1
        backtrack = 0
        while fp > f - gamma*alpha*np.dot(g.T, g):
            alpha = alpha*alpha*np.dot(g.T, g)[0,0]/(2*(fp + np.dot(g.T, g)[0,0]*alpha - f))
            wp = w - alpha*g
            [fp,gp] = funObj(wp,X,y,lam)
            funVals.append(f)
            funEvals = funEvals+1
            numBackTrack = numBackTrack + 1
        if funEvals > 2:
            alpha = min(1,2*(f_old - f)/np.dot(g.T, g)[0,0])
        f_old = f
        g_old = g
        w = wp
        f = fp
        g = gp
        optCond = LA.norm(g, np.inf)
        if ((verbosity > 0) and (funEvals % freq == 0)):
            print(funEvals,alpha,f,optCond)
        if (optCond < 1e-2):
            break
        if (funEvals >= maxEvals):
            break
    return (funVals,numBackTrack)

### Gradient Descent with Line Search
#### Version 1: Armijo Backtracking Line Search
Lets now define the most basic version of Gradient Descent and tune the learning rate!

In [4]:
from numpy import linalg as LA

def gdAccelerated(funObj,w,maxEvals,alpha,gamma,X,y,lam, verbosity, freq):
    [f,g] = funObj(w,X,y,lam)
    funEvals = 1
    funVals = []
    funVals.append(f)
    numBackTrack = 0
    t = 1; 
    x = w;
    while(1):
        if (funEvals > 1):
            tp = (1 + np.sqrt(1+4*t*t))/2
            x = w + ((t-1)/tp)*(w-w_old)
            t = tp
            [f,g] = funObj(x,X,y,lam)
            funEvals = funEvals+1
        w_old = w
        wp = x - alpha*g; 
        [fp,gp] = funObj(wp,X,y,lam)
        funEvals = funEvals+1;
        backtrack = 0
        while fp > f - gamma*alpha*np.dot(g.T, g):
            alpha = alpha*alpha*np.dot(g.T, g)[0,0]/(2*(fp + np.dot(g.T, g)[0,0]*alpha - f))
            wp = x - alpha*g; 
            [fp,gp] = funObj(wp,X,y,lam)
            funEvals = funEvals+1;  
            funVals.append(f)
            numBackTrack = numBackTrack + 1
            backtrack = 1            
        w = wp
        f = fp
        g = gp
        if (backtrack == 0):
            funVals.append(f)
        optCond = LA.norm(g, np.inf)
        if ((verbosity > 0) and (funEvals % freq == 0)):
            print(funEvals,alpha,f,optCond)
        if (optCond < 1e-2):
            break
        if (funEvals >= maxEvals):
            break
    return (funVals,numBackTrack)

In [5]:
[nSamples,nVars] = X.shape
w = np.zeros((nVars,1))
(funV1,numBackTrack) = gdArmijo(LogisticLoss,w,250,1,1e-4,X,y,1,1,10)
print(len(funV1))
print("Number of Backtrackings = " + str(numBackTrack))

20 4.1796056076066775e-05 1012.4951823907003 95.53384664247373
30 3.997469461451199e-05 953.5753177548778 35.655121235018115
40 0.00012388804702371698 885.1699025757175 31.335650768554615
50 1.3144239424878223e-05 868.4387633860271 22.72480197211717
60 3.19951286344067e-05 846.6514541812807 38.97806992194706
70 6.404017174599795e-05 830.1929117099367 42.25847013698607
80 0.00011081246239613855 812.8209357043669 24.121207738723843
90 3.612908378647803e-05 797.2790559279035 24.85602397608963
100 6.343884114915358e-05 785.5404258802879 19.27475140750184
110 0.00013841402942537257 769.2091021378449 22.493117400607588
120 9.712124304379424e-05 748.6620774703173 17.151002599529306
130 0.00023936031789675434 729.0076217026603 17.03859779736474
140 1.9406980779655936e-05 692.6012054315212 17.644861089297766
150 6.422924249528637e-05 679.7141080649994 15.576008890767799
160 2.41322291549794e-05 669.1484547018877 28.44097383399261
170 4.8713805178756046e-05 660.6072256831944 14.413102172561631
1

In [6]:
[nSamples,nVars] = X.shape
w = np.zeros((nVars,1))
(funV2,numBackTrack) = gdAccelerated(LogisticLoss,w,250,1,1e-4,X,y,1,1,10)
print(len(funV2))
print("Number of Backtrackings = " + str(numBackTrack))

  


20 1.2270551792159088e-05 3421.4772580811746 447.0919884294048
30 1.2270551792159088e-05 1139.9065594799376 80.13019693661147
40 1.2270551792159088e-05 1047.8904744907043 54.85223246465415
50 1.2270551792159088e-05 985.291868887159 35.03866383246867
60 1.2270551792159088e-05 938.086984407997 27.327323327228356
70 1.2270551792159088e-05 897.1309162835348 23.599128324252504
80 1.2270551792159088e-05 858.3135600342138 21.437055141153905
90 1.2270551792159088e-05 820.5533876518972 19.893967924792296
100 1.2270551792159088e-05 783.6170359264083 18.610368547231452
110 1.2270551792159088e-05 747.8221226783364 17.440211008769058
120 1.2270551792159088e-05 713.306991704953 16.288372031217055
130 1.2270551792159088e-05 679.9772013181196 15.032861232889823
140 1.2270551792159088e-05 647.823562879202 13.553023797030034
150 1.2270551792159088e-05 616.9347270841175 11.939031480531353
160 1.2270551792159088e-05 587.3827686140373 11.061990453498689
170 1.2270551792159088e-05 559.161359353416 10.280160