# Logistic Regression (SGD)

In [1]:
import numpy as np
from sklearn.metrics import accuracy_score

In [2]:
class LogisticRegression:
    """Logistic Regression implementation.

    This class provides functionalities to perform logistic regression. Check the fit method.
    """
    
    def __init__(self):
        """To add globals if required to make model persistent."""
        
        pass
    
    def sigmoid_dot(self, X, W, b):
        """Returns sigmoid of (W.X + b)"""
        
        return 1 / (1 + np.exp(-(np.dot(X, W) + b)))
    
    def update(self, X, y, y_, W, b, alpha, m):
        """Updates W, b stochastically for each datapoint."""
        
        res = y-y_
        for i in range(m):

            dJ_dW = np.dot(res[i],X[i]) / m
            dJ_db = np.mean(res)
            
            W += alpha * dJ_dW
            b += alpha * dJ_db
        return W, b 
    
    def cost(self, y, y_):
        """Returns logistic cost between predicted values and true labels."""
        
        m = y.shape[0]
        c = 0
        
        for i in range(m):
            c += y[i] * np.log(y_[i]) + (1 - y[i]) * np.log(1 - y_[i])
        return c / (-m)
    
    def fit(self, X, y, iterations=1000, alpha=0.000001):
        """
        Fits the logistic regression model to the training data.

        Parameters
        ----------
        X : np.ndarray
            Input data of shape (m, n).
        y : np.ndarray
            True labels of shape (m,).
        iterations : int, optional
            Number of iterations for training. Default is 1000.
        alpha : float, optional
            Learning rate. Default is 0.000001

        Returns
        -------
        tuple
            Trained model parameters W and b.
        """
        
        m, n = X.shape
        W = np.random.rand(n)
        b = 0
        
        for k in range(iterations):
            y_ = self.sigmoid_dot(X, W, b)
            W, b = self.update(X, y, y_, W, b, alpha,m)
            print(f"Iteration: {k}",
                  f"Cost: {self.cost(y, y_)}",
                  f"Acc: {accuracy_score(y, y_.round())}")
        return W, b
    
    def predict(self, X, W, b):
        """Generates predictions for input data X using trained model parameters W and b.
        
        Returns rounded predictions. Might need to fix.
        """
        
        s = self.sigmoid_dot(X, W, b)
        return s.round()

# Dataset Tests

Multilabel Dataset. Using 2 targets at a time as Model is Binary.

In [3]:
from sklearn.datasets import load_iris
from sklearn.datasets import make_classification

In [4]:
X = load_iris()["data"][50:150,3:4]
y = load_iris()["target"][50:150] - 1
print(y)
m = LogisticRegression()
W,b = m.fit(X,y,100, 0.009)
m.predict(X, W,b) == y

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Iteration: 0 Cost: 0.8313986194881866 Acc: 0.5
Iteration: 1 Cost: 0.740713426560121 Acc: 0.5
Iteration: 2 Cost: 0.6747030224020663 Acc: 0.5
Iteration: 3 Cost: 0.6290495110395917 Acc: 0.5
Iteration: 4 Cost: 0.5987444207086395 Acc: 0.5
Iteration: 5 Cost: 0.5792204604081066 Acc: 0.6
Iteration: 6 Cost: 0.5668885455955076 Acc: 0.65
Iteration: 7 Cost: 0.559187254150925 Acc: 0.78
Iteration: 8 Cost: 0.554398574804787 Acc: 0.84
Iteration: 9 Cost: 0.5514147191299946 Acc: 0.84
Iteration: 10 Cost: 0.5495387922929182 Acc: 0.92
Iteration: 11 Cost: 0.548339048576372 Acc: 0.92
Iteration: 12 Cost: 0.5475505219125477 Acc: 0.92
Iteration: 13 Cost: 0.5470115399148036 Acc: 0.92
Iteration: 14 Cost: 0.5466238057138489 Acc: 0.94
Iteration: 15 Cost: 0.546327684379735 Acc: 0.94
Iteration: 16 Cost: 0.54608704435

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True, False,  True,  True,  True,  True,  True,  True,
       False,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True, False,  True,
        True,  True, False, False,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [5]:
X = load_iris()["data"][0:100,3:4]
y = load_iris()["target"][0:100]

print(y)
m = LogisticRegression()
W,b = m.fit(X,y,100, 0.009)
m.predict(X, W,b) == y

[0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1]
Iteration: 0 Cost: 0.5752742996475233 Acc: 0.5
Iteration: 1 Cost: 0.5655645988310147 Acc: 0.55
Iteration: 2 Cost: 0.5593814298072648 Acc: 0.91
Iteration: 3 Cost: 0.5554068566854082 Acc: 0.98
Iteration: 4 Cost: 0.5527997305433344 Acc: 0.98
Iteration: 5 Cost: 0.5510332828343762 Acc: 0.99
Iteration: 6 Cost: 0.5497814715972722 Acc: 1.0
Iteration: 7 Cost: 0.5488439438453815 Acc: 1.0
Iteration: 8 Cost: 0.5480981242486379 Acc: 1.0
Iteration: 9 Cost: 0.5474692224266665 Acc: 1.0
Iteration: 10 Cost: 0.5469116781289426 Acc: 1.0
Iteration: 11 Cost: 0.5463977639551746 Acc: 1.0
Iteration: 12 Cost: 0.5459106147099115 Acc: 1.0
Iteration: 13 Cost: 0.5454399752127558 Acc: 1.0
Iteration: 14 Cost: 0.544979610470847 Acc: 1.0
Iteration: 15 Cost: 0.544525729798766 Acc: 1.0
Iteration: 16 Cost: 0.5440760284360

array([ True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True])

In [6]:
X, y = make_classification(1000, 5, random_state=0)

In [7]:
m = LogisticRegression()
W,b = m.fit(X,y,100, 0.008)
m.predict(X, W,b) == y

Iteration: 0 Cost: 0.38673834831778126 Acc: 0.861
Iteration: 1 Cost: 0.385626385076859 Acc: 0.863
Iteration: 2 Cost: 0.3848657388848308 Acc: 0.866
Iteration: 3 Cost: 0.38416288721611075 Acc: 0.867
Iteration: 4 Cost: 0.38347218653910836 Acc: 0.867
Iteration: 5 Cost: 0.3827865529878571 Acc: 0.867
Iteration: 6 Cost: 0.38210487515365227 Acc: 0.867
Iteration: 7 Cost: 0.3814269598140023 Acc: 0.867
Iteration: 8 Cost: 0.38075275417049637 Acc: 0.867
Iteration: 9 Cost: 0.38008222689749516 Acc: 0.869
Iteration: 10 Cost: 0.37941535011788013 Acc: 0.869
Iteration: 11 Cost: 0.37875209667624826 Acc: 0.869
Iteration: 12 Cost: 0.37809243973265527 Acc: 0.87
Iteration: 13 Cost: 0.3774363526988877 Acc: 0.87
Iteration: 14 Cost: 0.3767838092274813 Acc: 0.871
Iteration: 15 Cost: 0.3761347832078828 Acc: 0.872
Iteration: 16 Cost: 0.37548924876391804 Acc: 0.873
Iteration: 17 Cost: 0.37484718025138425 Acc: 0.874
Iteration: 18 Cost: 0.3742085522557093 Acc: 0.874
Iteration: 19 Cost: 0.3735733395896406 Acc: 0.874
It

array([ True,  True,  True,  True,  True, False,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True, False,  True,  True,  True,  True,
       False,  True,  True,  True,  True, False,  True,  True, False,
        True,  True,  True, False,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True, False,  True,  True,  True,  True, False,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True, False,
        True, False,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,  True,  True,  True,  True,  True,  True,  True,
        True,  True,