Application of stochastic gradient ascent for logistic regression

In [1]:
import numpy as np
import pandas as pd
from scipy import stats as st

In [2]:
df5 = pd.read_excel (r'admit.xlsx', header = 0)

In [3]:
df5 = df5.iloc[:,:4]
X = df5.iloc[:,1:].values
y = df5.iloc[:,0].values

In [4]:
# define gradient calculator
def grad_calc(y, x, a):
    grad = np.empty([x.shape[1], 1])
    for i in np.arange(grad.shape[0]):
        # calculate for each index of the gradient
        grad[i] = (y - (1/(1 + np.exp(-np.dot(a,x.T))))) * x[0][i]
    return(grad.T)

In [5]:
# define stochastic gradient ascent algorithm
def stoch_grad_ASC(r, m, X, y):
    np.random.seed(123) # make the results reproducible

    r = 1
    m = 1000
    n = X.shape[0]
    coeffs = []

    for k in np.arange(r):
        a = np.empty([m, X.shape[1]])
        # step 1
        # arbitrarily initialize w1
        a[0,:] = np.random.rand(1,X.shape[1])
        print(a[0,:])

        # step 2
        for i in (np.arange(m-1) + 1):
            # (a) choose index uniformly at random
            index = st.randint.rvs(0, n, size=1)

            # (b) choose step-size sm > 0
            sm = 1/m

            # (c) update w
            a[i,:] = a[i-1,:] + sm * grad_calc(y[index], X[index,], a[i-1,:])
            
        # step 3
        #print(a)
        coeffs.append(a[m-1,:])
            
    return(coeffs)

In [6]:
coefficients = stoch_grad_ASC(r = 30, m = 1000, X = X, y = y)

[0.69646919 0.28613933 0.22685145]


In [7]:
np.mean(coefficients, axis = 0)

array([-0.1705613 ,  0.25962486,  0.08282202])