# Logistic Regression 
Gradient descent approach

In [25]:
# importing the packages

import pandas as pd
import numpy as np
from __future__ import division

## Toy Dataset

In [6]:
X = np.array([[1, 2], [4, 5], [2, 5]])
y = np.array([1, 0, 0])

print X, y

[[1 2]
 [4 5]
 [2 5]] [1 0 0]


In [7]:
test = np.array([[3, 1], [3, 5]])
print(test)

[[3 1]
 [3 5]]


## Define the Logistic Regression class

In [21]:
class LogisticRegression():
    def __init__(self, X, y):
        self.X = X
        self.y = y
    
    def sigmoid(self, z):
        '''
        Computes the sigmoid
        '''
        return 1/(1 + np.exp(-z))
    
    def loss_function(self):
        '''
        Computes log loss for binary classification. 
        In case ofmulti class classigication, we need to compute cross entropy
        '''
        loss = (-self.y * np.log(self.train_pred + 0.0001) - (1-self.y)*np.log(1-self.train_pred + 0.0001)).mean()
        return loss
    
    def gradient_descent(self):
        '''
        Updates the weights by computing: W - learning rate * gradient.
        gradient = x * (sigmoid(z) - y)
        
        Returns:
        W - Updates weights
        '''
        grad = np.dot(self.X.T, (self.train_pred - self.y)) * self.learning_rate
        self.W = self.W - grad
        return self.W
    
    def log_reg(self, learning_rate, iterations):
        '''
        Function that iterates over "iterations" to compute loss and then 
        performs gradient descent to update the weights
        
        Inputs:
        learning_rate - learning rate
        iterations - Number of iteations
        
        ''' 
        self.learning_rate = learning_rate
        self.W = np.random.randn(len(self.X[0])) # weights are randomly initialised at first
        for it in range(iterations):
            self.train_pred = []
            for row in self.X:
                z = np.sum([row[i] * self.W[i] for i in range(len(row))])
                self.train_pred.append(self.sigmoid(z))
            self.train_pred = np.array(self.train_pred)
            loss = self.loss_function()
            """print(Epoch {it}, loss {loss})"""
            print('Epoch: '+  str(it))
            print('loss: '+ str(loss))
            self.W = self.gradient_descent()
        print('Coefficients: '+ str(self.W))
        
    def predict(self, test):
        '''
        Multiply the features of every row with the corresponding coefficients
        '''
        self.test_pred = []
        for row in test:
            z = np.sum([row[i] * self.W[i] for i in range(len(row))])
            self.test_pred.append(self.sigmoid(z))
        print('Predictions: '+ str(self.test_pred))

### Initialise the model

In [22]:
model = LogisticRegression(X,y)

### Fit logistic regression to get the coefficients

In [23]:
model.log_reg(learning_rate=0.1, iterations=10)

Epoch: 0
loss: 1.7676618303055556
Epoch: 1
loss: 0.544093871752797
Epoch: 2
loss: 0.4891392219221098
Epoch: 3
loss: 0.4713190775481871
Epoch: 4
loss: 0.468902988672947
Epoch: 5
loss: 0.468544680916872
Epoch: 6
loss: 0.4682778105295354
Epoch: 7
loss: 0.46802156412413054
Epoch: 8
loss: 0.4677740603034704
Epoch: 9
loss: 0.4675349734463429
Coefficients: [-0.17284567 -0.27739538]


We can see that the log loss decreases with every iteration

### Predict target values for the test set

In [24]:
model.predict(test)

Predictions: [0.310896293236343, 0.12948568309901753]
