### Dataset
Lets Load the dataset. We shall use the following datasets:
Features are in: "sido0_train.mat"
Labels are in: "sido0_train.targets"

In [10]:
from scipy.io import loadmat
import numpy as np

X = loadmat(r"/Users/rkiyer/Desktop/teaching/CS6301/jupyter/data/sido0_matlab/sido0_train.mat")
y = np.loadtxt(r"/Users/rkiyer/Desktop/teaching/CS6301/jupyter/data/sido0_matlab/sido0_train.targets")

# Statistics of the Dense Format of X
X = X['X'].todense()
print(X.shape)

(12678, 4932)


### Logistic Regression Definition
Lets use the Logistic Regression definition we previously used


In [11]:
def LogisticLoss(w, X, y, lam):
    # Computes the cost function for all the training samples
    m = X.shape[0]
    Xw = np.dot(X,w)
    yT = y.reshape(-1,1)
    yXw = np.multiply(yT,Xw)
    f = np.sum(np.logaddexp(0,-yXw)) + 0.5*lam*np.sum(np.multiply(w,w))
    gMul = 1/(1 + np.exp(yXw))
    ymul = -1*np.multiply(yT, gMul)
    g =  np.dot(ymul.reshape(1,-1),X) + lam*w.reshape(1,-1)
    g = g.reshape(-1,1)
    return [f, g]      

### Barzelia Borwein step length
Lets invoke BB Step Length Gradient Descent

In [24]:
from numpy import linalg as LA

def gdBB(funObj,w,maxEvals,alpha,gamma,X,y,lam, verbosity, freq):
    [f,g] = funObj(w,X,y,lam)
    funEvals = 1
    funVals = []
    f_old = f
    g_old = g
    funVals.append(f)
    numBackTrack = 0
    while(1):
        wp = w - alpha*g
        [fp,gp] = funObj(wp,X,y,lam)
        funVals.append(f)
        funEvals = funEvals+1
        backtrack = 0
        if funEvals > 2:
            g_diff = g - g_old
            alpha = -alpha*np.dot(g_old.T, g_diff)[0,0]/np.dot(g_diff.T, g_diff)[0,0]
        while fp > f - gamma*alpha*np.dot(g.T, g):
            alpha = alpha*alpha*np.dot(g.T, g)[0,0]/(2*(fp + np.dot(g.T, g)[0,0]*alpha - f))
            wp = w - alpha*g
            [fp,gp] = funObj(wp,X,y,lam)
            funVals.append(f)
            funEvals = funEvals+1
            numBackTrack = numBackTrack + 1
        f_old = f
        g_old = g
        w = wp
        f = fp
        g = gp
        optCond = LA.norm(g, np.inf)
        if ((verbosity > 0) and (funEvals % freq == 0)):
            print(funEvals,alpha,f,optCond)
        if (optCond < 1e-2):
            break
        if (funEvals >= maxEvals):
            break
    return (funVals,numBackTrack)

In [25]:
[nSamples,nVars] = X.shape
w = np.zeros((nVars,1))
(funV1,numBackTrack) = gdBB(LogisticLoss,w,250,1,1e-4,X,y,1,1,10)
print(len(funV1))
print("Number of Backtrackings = " + str(numBackTrack))

  


30 2.5788899258035455e-05 1135.7630614160032 76.76587360949645
40 3.4479636532951836e-05 932.6568131903588 87.65958408181322
50 4.021247435512306e-06 849.9100523680455 43.4455639307481
70 1.7793550062790673e-05 776.6339625092082 56.759560073959456
80 0.00013351557593887492 744.1113239997849 39.28022856286154
100 8.059370484177815e-09 687.6850460523219 74.14968315884308
110 0.0003390191457756867 661.652761006767 21.602772936085945
120 0.004059006282489887 518.5101896471618 43.03303982013437
130 3.537647992869348e-05 513.0734220349092 8.84234859224609
150 0.00254417659060005 448.2235645802006 80.78196339937944
160 7.613967174596072e-05 424.1394663061126 16.374169898800403
170 2.5668319295286767e-08 416.9102811833201 49.00940438219084
190 1.1284701287159109e-05 379.81228121212956 21.89514315379142
220 9.464450390497666e-07 237.86073095644207 9.047437842072526
230 0.004223306733352749 227.94326633703025 50.92814591412953
240 1.60654955766724e-05 216.1835035208207 3.451857941580089
250 4.13

### Conjugate Gradient Descent
Nonlinear Conjugate Gradient Descent

In [37]:
from numpy import linalg as LA

def gdCG(funObj,w,maxEvals,alpha,gamma,X,y,lam, verbosity, freq):
    [f,g] = funObj(w,X,y,lam)
    funEvals = 1
    funVals = []
    f_old = f
    g_old = g
    funVals.append(f)
    numBackTrack = 0
    d = g
    while(1):
        wp = w - alpha*d
        [fp,gp] = funObj(wp,X,y,lam)
        funVals.append(f)
        funEvals = funEvals+1
        backtrack = 0
        if funEvals > 2:
            alpha = min(1,2*(f_old - f)/np.dot(g.T, g)[0,0])
            beta = np.dot(g.T, g)[0,0]/np.dot(g_old.T, g_old)[0,0]
            d = g + beta*d
        else:
            d = g
        while fp > f - gamma*alpha*np.dot(g.T, d)[0,0]:
            alpha = alpha*alpha*np.dot(g.T, d)[0,0]/(2*(fp + np.dot(g.T, d)[0,0]*alpha - f))
            wp = w - alpha*d
            [fp,gp] = funObj(wp,X,y,lam)
            funVals.append(f)
            funEvals = funEvals+1
            numBackTrack = numBackTrack + 1
        f_old = f
        g_old = g
        w = wp
        f = fp
        g = gp
        optCond = LA.norm(g, np.inf)
        if ((verbosity > 0) and (funEvals % freq == 0)):
            print(funEvals,alpha,f,optCond)
        if (optCond < 1e-2):
            break
        if (funEvals >= maxEvals):
            break
    return (funVals,numBackTrack)

In [38]:
[nSamples,nVars] = X.shape
w = np.zeros((nVars,1))
(funV1,numBackTrack) = gdCG(LogisticLoss,w,250,1,1e-4,X,y,1,1,10)
print(len(funV1))
print("Number of Backtrackings = " + str(numBackTrack))

  


30 4.8248838390056086e-05 1100.5825812169905 71.17864014090668
40 5.761205021221726e-06 1020.2047566018817 60.08817550984056
50 7.00499199239626e-07 1002.4726260840971 153.62023184767298
60 7.545618878621844e-06 925.5611967492454 85.61509239991781
70 1.1115290013781674e-07 835.8507464241959 91.58198734211311
90 4.441863657411808e-05 751.702281580756 101.83346471201462
100 4.1046760588252236e-06 732.1216358152724 86.17994084529874
110 2.3993096858746483e-05 644.2756391402414 27.928830911694643
120 1.0056307504898691e-06 619.8412799763845 57.64301891477717
130 3.1198498348610805e-06 611.8077214404658 49.09555617390839
140 1.1188637267886875e-06 543.3696061152098 85.6780959606216
150 1.9476528884453286e-06 521.0911809959176 57.99381049868182
160 3.809031251617791e-06 436.93965622540577 60.16531562869829
170 8.993239281748671e-06 432.4501776394318 36.25034076350421
190 9.097237262686172e-06 390.3268532288466 40.463044973828325
200 2.959477446275187e-07 388.0645119908878 34.67348963196586
2