In [218]:
import numpy as np 
import pandas as pd

In [5]:
f = pd.read_excel('survey-data.xlsx')
f

Unnamed: 0,Study hours,Pass
0,0.5,0
1,0.75,0
2,1.0,0
3,1.25,0
4,1.5,0
5,1.75,0
6,1.75,1
7,2.0,0
8,2.25,1
9,2.5,0


In [85]:
X = np.array([f['Study hours'].values])
X_bar = np.concatenate((np.ones((1, X.shape[1])), X), axis = 0)

Y = f['Pass'].values

In [86]:
def sigmoid(x):
    return 1/(1 + np.exp(-x))

def logistic_regression(x_bar, y, w_init, eta, activation_function, tol = 1e-4, check_w_after = 10, max_iterations = 1000):
    w = [w_init]
    N = x_bar.shape[1]
    dim = x_bar.shape[0]
    it = 0
    while it < max_iterations:
        # mix data 
        mix_id = np.random.permutation(N)
        for i in mix_id:
            xi = x_bar[:, i].reshape(dim, 1)
            yi = y[i]
            zi = activation_function(np.dot(w[-1].T, xi))
            w_new = w[-1] - eta*((zi - yi)*xi)
            it += 1
            # stopping criteria
            if it % check_w_after == 0:          
                if np.linalg.norm(w_new - w[-check_w_after]) < tol: # Compare value of current value and the 1st value in the 10-member array
                    return w
            w.append(w_new)
    return w, it

In [144]:
n_features = X_bar.shape[0]
w_init = np.random.rand(1,n_features)
w, epochs = logistic_regression(X_bar, Y, w_init.T, 0.5, sigmoid, 1e-6)

In [88]:
from sklearn.linear_model import LogisticRegression

In [216]:
model = LogisticRegression(tol=1e-6, random_state=0).fit(X_bar.T, Y)
print('The retrieved coefficients from the sklearn library model w: [%.2f, %.2f] and accuracy score: %.2f' %(model.intercept_, model.coef_[:,1], model.score(X_bar.T, Y)))

model.coef_[:,1], model.intercept_ = w[-1].T[0,1], w[-1].T[0,0]
print('The retrieved coefficients from the manually built model w: [%.2f, %.2f] and accuracy score: %.2f' %(model.intercept_, model.coef_[:,1], model.score(X_bar.T, Y)))

The retrieved coefficients from the sklearn library model w: [-3.14, 1.15] and accuracy score: 0.80
The retrieved coefficients from the manually built model w: [-6.31, 3.59] and accuracy score: 0.75
