In [1]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
from sklearn.datasets import load_iris

In [3]:
iris = load_iris()
X = iris.data[:, :2]
y = (iris.target != 0) * 1

In [4]:
# first we load in the sigmoid function
# z is equal to the dot product of X, our independent variables, and theta, our coefficients
def sigmoid(z):
    return (np.exp(z)) / (1 + np.exp(z))

$$l l=\sum_{i=1}^{N} y_{i} \beta^{T} x_{i}-\log \left(1+e^{\beta^{T} x_{i}}\right)$$

In [5]:
# now we want to write a function for our log likelihood function, as seen above

In [6]:
def log_likelihood(independent_variables, dependant_variable, parameters):
    score = np.dot(independent_variables, parameters)
    logLikelihood = np.sum(dependant_variable*score - np.log(1+np.exp(score)))
    return logLikelihood

Taking the derivative of the log likelihood function will simply give us:

$$\nabla l l=X^{T}(Y-\text {Predictions})$$

Now lets create the function!

In [7]:
def logistic_regression(independent_variables, dependant_variables, max_steps, learning_rate, min_error, add_intercept=True):
    if add_intercept == True:
        intercept = np.ones((independent_variables.shape[0], 1)) # create a vector of ones which will represent our intercept
        independent_variables = np.hstack((intercept, independent_variables))
        
    parameters = np.zeros(independent_variables.shape[1]) #initiate our coefficients
    for step in range(max_steps):
        #The scores and prediction calculations are known as forward propagation
        scores = np.dot(independent_variables, parameters)
        predictions = sigmoid(scores)
        
        #To calculate the derivative of the log likelihood function we get (which is our second equation from above!)
        derivative_ll = np.dot(independent_variables.T, dependant_variables - predictions)
        
        parameters += learning_rate*derivative_ll
        if step % 10000 == 0:
            print(log_likelihood(independent_variables, dependant_variables, parameters))
        
        # we stop the algorithm early once we stop learning much
        if np.mean((learning_rate*derivative_ll)**2) < min_error:
            break
        
    return parameters
        

In [8]:
result = logistic_regression(X, y, 2000000000000, 0.00001, 0.00000000001)

-103.58226560112641
-38.48657379087793
-25.698603905781567
-20.121386648705258
-16.965673462545208
-14.917805763810975
-13.472106597968981
-12.39174213899274
-11.550593195044588
-10.875109369568849
-10.319387799571247
-9.8532335486099
-9.455932443198735
-9.11277806736115
-8.813026626282623
-8.548637220981393
-8.313465953301016
-8.102733417160271
-7.912662862422499
-7.740228257934197
-7.582975064976589
-7.438890282101474
-7.306306596897681
-7.183830602148151
-7.0702882850934285
-6.9646831095808714
-6.866163409952962
-6.7739967601463364
-6.687549630178369
-6.606271094673206
-6.529679678232892
-6.457352652002916
-6.388917262383494
-6.324043495140991
-6.262438068910715
-6.203839420069537
-6.148013492370557
-6.094750183951257
-6.043860334487276
-5.995173158640307
-5.94853405019176
-5.903802695595966
-5.860851447029616
-5.819563914042758
-5.779833740143716
-5.741563536468986
-5.704663949396352
-5.66905284278895
-5.634654578686293
-5.601399382827111
-5.569222783505121
-5.538065114015016


In [9]:
print("Therefore the coefficient of our intercept is {:f} and the coefficients for our set of independent variables are: {:f} and {:f}.".format(result[0], result[1], result[2]))

Therefore the coefficient of our intercept is -2.549134 and the coefficients for our set of independent variables are: 5.505012 and -8.736687.
