## Logistic Regression with L2 Regularization

In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.datasets import load_iris
dataset = load_iris()
X = dataset.data
y = dataset.target

target_names = list(dataset.target_names)
print(target_names)

['setosa', 'versicolor', 'virginica']


In [3]:
# Change to binary class
y = (y > 0).astype(int)
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

In [13]:
# Model: Linear Regression
class LogReg:
    """
    This implementation of Logistic Regression uses batch gradient descent with regularization.
    """
    def __init__(self, num_iters=2800, tolerance = 1e-10, alpha=0.00001, lambd=10, threshold=0.5, verbose=False):
        self.num_iters = num_iters
        self.alpha = alpha # Learning rate
        self.lambd = lambd # Regularization parameter
        self.tolerance = tolerance
        self.threshold = threshold
        self.verbose = verbose
        
    def add_ones(self, X):
        return np.concatenate((np.ones((len(X),1)), X), axis = 1)
      
    def sigmoid(self, X, theta):
        return 1/(1 + np.exp(X@theta))
    
    def cost(self, X, y_true):
        m = X.shape[0]
        y_hat = self.sigmoid(X, self.theta)
        temp_theta = self.theta[1:].copy()
        
        Cost = np.sum(-1*y_true*np.log(y_hat)-(1-y_true)*np.log(1-y_hat)) + self.lambd * np.sum(temp_theta**2)
        
        return Cost
    
    def fit(self, X, y):
        X = X.copy()
        X = self.add_ones(X)
        y = y.reshape(-1, 1)
        
        self.theta = np.zeros((len(X[0]), 1))
        
        current_iter = 1
        norm = 1
        while (norm >= self.tolerance and current_iter < self.num_iters):
            old_theta = self.theta.copy()
            #grad = np.dot(np.transpose(y_hat-self.y), self.X)
            temp_theta = self.theta[1:].copy()
            grad = X.T@(y - self.sigmoid(X, self.theta)) + self.lambd * np.sum(temp_theta)
            grad= grad.reshape(-1, 1)
            
            self.theta = self.theta - self.alpha*grad
            
            if self.verbose and (current_iter%100 == 0):
                print(f'cost for {current_iter} iteration : {self.cost(X, y)}')
            norm = np.linalg.norm(old_theta - self.theta)
            current_iter += 1
            
        return self.theta
    
    def evaluate(self, X, y):
        """
        Returns mse loss for a dataset evaluated on the hypothesis
        """
        X = self.add_ones(X)
        return self.cost(X, y)
    
    def predict(self, X):
        prob = self.predict_proba(X)
        return (prob > self.threshold).astype(int)
        
    def predict_proba(self, X):
        """
        Returns probability of predictions.
        """
        X = self.add_ones(X)  
        return self.sigmoid(X, self.theta)

In [14]:
logreg = LogReg(verbose=True)

In [15]:
logreg.fit(X, y)

cost for 100 iteration : 78.72554244539407
cost for 200 iteration : 71.7637559135921
cost for 300 iteration : 66.07842247755053
cost for 400 iteration : 61.249885580691895
cost for 500 iteration : 57.14508887349538
cost for 600 iteration : 53.654471784734895
cost for 700 iteration : 50.68400001067733
cost for 800 iteration : 48.153754835764694
cost for 900 iteration : 45.99638378599468
cost for 1000 iteration : 44.1554145494372
cost for 1100 iteration : 42.5836123943238
cost for 1200 iteration : 41.241486686288404
cost for 1300 iteration : 40.09598899186002
cost for 1400 iteration : 39.11940756500792
cost for 1500 iteration : 38.288443875685104
cost for 1600 iteration : 37.58344894655533
cost for 1700 iteration : 36.98779553479041
cost for 1800 iteration : 36.48736352452237
cost for 1900 iteration : 36.070118517386234
cost for 2000 iteration : 35.7257665818431
cost for 2100 iteration : 35.44547098210861
cost for 2200 iteration : 35.22161925432031
cost for 2300 iteration : 35.0476311702

array([[ 0.21239942],
       [ 0.1232903 ],
       [ 0.64963602],
       [-0.99385055],
       [-0.36693142]])

In [16]:
predictions = logreg.predict(X)
predictions = predictions.squeeze()

In [17]:
np.sum(y == predictions) / len(y)

1.0

In [18]:
predictions

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1])