# Logistic Regression (SGD)

In [19]:
import numpy as np
from sklearn.metrics import accuracy_score

In [20]:
class LogisticRegression:
    """Logistic Regression implementation.

    This class provides functionalities to perform logistic regression. Check the fit method.
    """
    
    def __init__(self):
        """To add globals if required to make model persistent."""
        
        pass
    
    def sigmoid_dot(self, X, W, b):
        """Returns sigmoid of (W.X + b)"""
        
        return 1 / (1 + np.exp(-(np.dot(X, W) + b)))
    
    def update(self, X, y, y_, W, b, alpha, m):
        """Updates W, b stochastically for each datapoint."""
        
        res = y-y_
        for i in range(m):

            dJ_dW = np.dot(res[i],X[i]) / m
            dJ_db = np.mean(res)
            
            W += alpha * dJ_dW
            b += alpha * dJ_db
        return W, b 
    
    def cost(self, y, y_):
        """Returns logistic cost between predicted values and true labels."""
        
        m = y.shape[0]
        c = 0
        
        for i in range(m):
            c += y[i] * np.log(y_[i]) + (1 - y[i]) * np.log(1 - y_[i])
        return c / (-m)
    
    def fit(self, X, y, iterations=1000, alpha=0.000001):
        """
        Fits the logistic regression model to the training data.

        Parameters
        ----------
        X : np.ndarray
            Input data of shape (m, n).
        y : np.ndarray
            True labels of shape (m,).
        iterations : int, optional
            Number of iterations for training. Default is 1000.
        alpha : float, optional
            Learning rate. Default is 0.000001

        Returns
        -------
        tuple
            Trained model parameters W and b.
        """
        
        m, n = X.shape
        W = np.random.rand(n)
        b = 0
        
        for k in range(iterations):
            y_ = self.sigmoid_dot(X, W, b)
            W, b = self.update(X, y, y_, W, b, alpha,m)
            print(f"Iteration: {k}",
                  f"Cost: {self.cost(y, y_)}",
                  f"Acc: {accuracy_score(y, y_.round())}")
        return W, b
    
    def predict(self, X, W, b):
        """Generates predictions for input data X using trained model parameters W and b.
        
        Returns rounded predictions. Might need to fix.
        """
        
        s = self.sigmoid_dot(X, W, b)
        return s.round()

# Usage

Multilabel Dataset. Using 2 targets at a time as Model is Binary.

In [21]:
from sklearn.datasets import load_iris
from sklearn.datasets import make_classification

In [22]:
X = load_iris()["data"][50:150,3:4]
y = load_iris()["target"][50:150] - 1
m = LogisticRegression()
W,b = m.fit(X,y,100, 0.009)

Iteration: 0 Cost: 0.6810392254190654 Acc: 0.5
Iteration: 1 Cost: 0.6605455546683778 Acc: 0.5
Iteration: 2 Cost: 0.6477584497702533 Acc: 0.5
Iteration: 3 Cost: 0.6398759412138559 Acc: 0.5
Iteration: 4 Cost: 0.6350331499062908 Acc: 0.57
Iteration: 5 Cost: 0.6320429055822937 Acc: 0.6
Iteration: 6 Cost: 0.6301700023566245 Acc: 0.65
Iteration: 7 Cost: 0.6289667439033088 Acc: 0.78
Iteration: 8 Cost: 0.6281632868653658 Acc: 0.84
Iteration: 9 Cost: 0.6275979907278775 Acc: 0.84
Iteration: 10 Cost: 0.6271743917338936 Acc: 0.92
Iteration: 11 Cost: 0.6268350521855562 Acc: 0.92
Iteration: 12 Cost: 0.6265458184323885 Acc: 0.92
Iteration: 13 Cost: 0.6262863987389538 Acc: 0.92
Iteration: 14 Cost: 0.626044743145763 Acc: 0.94
Iteration: 15 Cost: 0.6258136980751462 Acc: 0.94
Iteration: 16 Cost: 0.6255890174666678 Acc: 0.94
Iteration: 17 Cost: 0.625368181274303 Acc: 0.94
Iteration: 18 Cost: 0.6251496939054877 Acc: 0.94
Iteration: 19 Cost: 0.6249326677584581 Acc: 0.94
Iteration: 20 Cost: 0.624716576041199

In [23]:
X = load_iris()["data"][0:100,3:4]
y = load_iris()["target"][0:100]

m = LogisticRegression()
W,b = m.fit(X,y,100, 0.009)

Iteration: 0 Cost: 0.5398749316241829 Acc: 0.5
Iteration: 1 Cost: 0.5191350329038569 Acc: 0.55
Iteration: 2 Cost: 0.5056580955125702 Acc: 0.84
Iteration: 3 Cost: 0.49696960110357324 Acc: 0.91
Iteration: 4 Cost: 0.49136823833242893 Acc: 0.98
Iteration: 5 Cost: 0.4877284152282435 Acc: 0.99
Iteration: 6 Cost: 0.4853234461873965 Acc: 0.99
Iteration: 7 Cost: 0.48369137159280134 Acc: 1.0
Iteration: 8 Cost: 0.4825413485077577 Acc: 1.0
Iteration: 9 Cost: 0.48169136705289545 Acc: 1.0
Iteration: 10 Cost: 0.4810279496052282 Acc: 1.0
Iteration: 11 Cost: 0.4804805084009422 Acc: 1.0
Iteration: 12 Cost: 0.4800051960136771 Acc: 1.0
Iteration: 13 Cost: 0.4795747979787126 Acc: 1.0
Iteration: 14 Cost: 0.47917243124267855 Acc: 1.0
Iteration: 15 Cost: 0.4787876246223995 Acc: 1.0
Iteration: 16 Cost: 0.4784138843386407 Acc: 1.0
Iteration: 17 Cost: 0.4780471832295688 Acc: 1.0
Iteration: 18 Cost: 0.4776850236430354 Acc: 1.0
Iteration: 19 Cost: 0.47732585632304575 Acc: 1.0
Iteration: 20 Cost: 0.4769687200973137

In [24]:
X, y = make_classification(1000, 5, random_state=0)

In [25]:
m = LogisticRegression()
W,b = m.fit(X,y,100, 0.008)

Iteration: 0 Cost: 0.28777558158298233 Acc: 0.921
Iteration: 1 Cost: 0.286943454229056 Acc: 0.924
Iteration: 2 Cost: 0.2866625538369118 Acc: 0.924
Iteration: 3 Cost: 0.2863898674221144 Acc: 0.924
Iteration: 4 Cost: 0.2861185693981546 Acc: 0.924
Iteration: 5 Cost: 0.2858485648927204 Acc: 0.924
Iteration: 6 Cost: 0.28557984358863686 Acc: 0.924
Iteration: 7 Cost: 0.2853123963023754 Acc: 0.925
Iteration: 8 Cost: 0.28504621394571694 Acc: 0.925
Iteration: 9 Cost: 0.2847812875129227 Acc: 0.925
Iteration: 10 Cost: 0.2845176080796554 Acc: 0.925
Iteration: 11 Cost: 0.2842551668021377 Acc: 0.927
Iteration: 12 Cost: 0.2839939549163109 Acc: 0.927
Iteration: 13 Cost: 0.28373396373700455 Acc: 0.927
Iteration: 14 Cost: 0.28347518465711774 Acc: 0.927
Iteration: 15 Cost: 0.28321760914680594 Acc: 0.927
Iteration: 16 Cost: 0.2829612287526797 Acc: 0.927
Iteration: 17 Cost: 0.28270603509701125 Acc: 0.927
Iteration: 18 Cost: 0.28245201987695084 Acc: 0.927
Iteration: 19 Cost: 0.2821991748637516 Acc: 0.927
Ite

In [26]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
m.predict(X, W,b) 
y_true = y
y_pred = m.predict(X, W,b) 

# Calculate MAE
mae = mean_absolute_error(y_true, y_pred)
print(f'Mean Absolute Error (MAE): {mae}')

# Calculate MSE
mse = mean_squared_error(y_true, y_pred)
print(f'Mean Squared Error (MSE): {mse}')

# Calculate RMSE
rmse = np.sqrt(mse)
print(f'Root Mean Squared Error (RMSE): {rmse}')

# Calculate R-squared
r_squared = r2_score(y_true, y_pred)
print(f'R-squared: {r_squared}')

Mean Absolute Error (MAE): 0.063
Mean Squared Error (MSE): 0.063
Root Mean Squared Error (RMSE): 0.25099800796022265
R-squared: 0.747998991995968
