<b>Logistic-Regression (LogsReg) - Scratch</b> <br>
<i>Implementing logistic regression using only NumPy, step-by-step. </i>

<b>requirements</b>

In [None]:
# example:- pip install numpy

<b>imports</b>

In [2]:
# imports
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import datasets

<b>(1) DATA PRE-PROCESSING</b>

In [3]:
# Generate Synthetic Data
# Features (X) & Dependent-Variable(y)
X, y = datasets.make_blobs(centers=3, n_samples=500, n_features=2, shuffle=True, random_state=42, cluster_std = 1.3)

# Data-Splitting 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.3, random_state = 134)

# Data Pre-Processing
# Normalize (if needed)
# Data Visualization

<b>(2) ML ALGORITHM - SCRATCH</b>

In [4]:
class LogisticRegression: 
    
    # (2.1) Initiaize Model-Parameters
    def __init__(self, learning_rate = 0.001, iters = 1000) -> None:
        """
        Initialize Model-Parameters
        
        Parameters-Variables: 
            learning_rate : [param](int) learning_rate of the model
            iters         : [param](int) Number of Training Iterations , default = 1000
            weights       : [vars](int) weights of the model, initially None
            bias          : [vars](int) bais of the model, initially None
            
        Returns:
            Nothing
        """

        self.iters= iters
        self.learning_rate = learning_rate
        self.weights = None
        self.bias = None
        
    
    # (2.2) Calculate Sigmoid Function 
    def sigmoid(self,z) -> np.ndarray:
        """
        Calculates the sigmoid , fx_wb = g(wx+b) = g(z) = 1/(1+e^-z), given z
        
        Parameters: 
            z : (np.array) depenent-Variables , z=wx+b
            
        Returns: 
            The sigmoid function g(z) for values z
        
        """
        gz = 1 / (1 + np.exp(-z))
        return gz
        
    
    
    # (2.3) Calculate Cost/Loss Function
    def cross_entropy_loss(self,X,y) -> np.ndarray:
        """
        The loss function for Logistic Regression is log-loss / Cross-Entropy. 
        Calculate loss for each datapoints. So its a cost calculation
            
        Parameters: 
            X : (np.array) Indepenent-Variables (Features-Matrix)
            y : (np.array) True labels 
            
        Returns: 
           The computed cost
            
        """
        
        n = X.shape[0] # rows, needed when used for loop
        cost = 0
        
        # can also be done using for loop, but vectorization is faster : 
        
        # calculate z
        z = np.dot(X, self.weights) + self.bias
        
        # calculate sigmoid, fx_wb  i.e gz
        gz = self.sigmoid(z)
        
        # calculate cost
        cost = -np.dot(y, np.log(gz)) - np.dot((1 - y), np.log(1 - gz))
        cost /= n
        
        return cost
    
    
    # (2.4) Fit-Model 
    def fit(self, X, y) -> None:
        """
        Fits andd Trains Model to Data X. After Training gives us the learned
        Calculates Gradients and apply gradient descent algorithm
        
        Parameter :
            X : (np.array) Independent-Variable (Features-Matrix) 
        
        Returns : 
            Nothing
        """
        
        n_data, n_features = X.shape
        
        # init params
        self.weights = np.zeros(n_features)
        self.bias = 0
        history = {}
        
        for _ in range(self.iters):
            
            z = np.dot(X, self.weights) + self.bias
            y_preds = self.sigmoid(z)

            # Calc Gradients/derivs
            dw = (1/n_data) * np.dot(X.T, (y_preds-y))
            db = (1/n_data) * np.sum((y_preds-y))
            
            # GD algo / backprop : update Params based on derivs
            self.weights -= self.learning_rate * dw
            self.bias -= self.learning_rate * db
            
            # printing : 
            if self.iters % 100 == 0:
                cost = self.cross_entropy_loss(X, y)
                history[self.iters] = cost
                print(f"Iter\t{self.iters}\tCost\t{cost}")

        return history, self.weights, self.bias
    
    
    # (2.5) Predicted labels ŷ 
    def predict(self, X) -> np.ndarray:
        """
        Predictions ŷ of the model, calculates the probabilites and the classes based on those probs
        
        Parameters: 
            X : (np.array) Indepenent-Variables (Features-Matrix), X_test in this case
        
        Returns: 
             ŷ , the pred
        """
        
        z = np.dot(X, self.weights) + self.bias
        y_probs = self.sigmoid(z) # the probabilities of being in a particular class
        y_preds = [1 if i > 0.5 else 0 for i in y_probs] # separate to classes 0 or 1 based on probs if > or < than 0.5
        return np.array(y_probs), np.array(y_preds)

<b>(3) MODEL TRAINING</b>

In [5]:
# Fit (Training) 
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=1234)

regressor = LogisticRegression(learning_rate=0.0001, iters=1000)
regressor.fit(X_train, y_train)

Iter	1000	Cost	3.8874640795530597
Iter	1000	Cost	nan
Iter	1000	Cost	11.755729554804551
Iter	1000	Cost	inf
Iter	1000	Cost	19.520611465734046
Iter	1000	Cost	2.4472468836620296
Iter	1000	Cost	26.58548803562892
Iter	1000	Cost	4.40601072991484
Iter	1000	Cost	nan
Iter	1000	Cost	12.246322169082463
Iter	1000	Cost	inf
Iter	1000	Cost	20.011128320071222
Iter	1000	Cost	0.9107432285519179
Iter	1000	Cost	15.658220441206938
Iter	1000	Cost	inf
Iter	1000	Cost	23.408687051921007
Iter	1000	Cost	1.370359634515185
Iter	1000	Cost	nan
Iter	1000	Cost	12.566502253166421
Iter	1000	Cost	inf
Iter	1000	Cost	20.33126386809753
Iter	1000	Cost	0.44526823388843084
Iter	1000	Cost	4.083204488097816
Iter	1000	Cost	nan
Iter	1000	Cost	12.040410971918215
Iter	1000	Cost	inf
Iter	1000	Cost	19.731699725322557
Iter	1000	Cost	0.55093030752049
Iter	1000	Cost	4.919010191143007
Iter	1000	Cost	nan
Iter	1000	Cost	12.797294469072574
Iter	1000	Cost	inf
Iter	1000	Cost	19.260144094933747
Iter	1000	Cost	0.6306710371598686
Iter	1000	Cost	4.

  cost = -np.dot(y, np.log(gz)) - np.dot((1 - y), np.log(1 - gz))


Iter	1000	Cost	inf
Iter	1000	Cost	2.606697452880154
Iter	1000	Cost	inf
Iter	1000	Cost	1.828579567068404
Iter	1000	Cost	2.333280966306359
Iter	1000	Cost	1.3127115081355212
Iter	1000	Cost	1.5556973099949483
Iter	1000	Cost	0.8650049861573496
Iter	1000	Cost	0.9257164622045887
Iter	1000	Cost	0.547667589937366
Iter	1000	Cost	0.5728942152055544
Iter	1000	Cost	0.4384263385618243
Iter	1000	Cost	0.4593486099757817
Iter	1000	Cost	0.43242155305367597
Iter	1000	Cost	0.45186696628948475
Iter	1000	Cost	0.4316077278733555
Iter	1000	Cost	0.45087062207322764
Iter	1000	Cost	0.4312527482905536
Iter	1000	Cost	0.4504564834322183
Iter	1000	Cost	0.4309584102669583
Iter	1000	Cost	0.45011836715516473
Iter	1000	Cost	0.43067557389010497
Iter	1000	Cost	0.44979382916401367
Iter	1000	Cost	0.43039781366987045
Iter	1000	Cost	0.4494748089344726
Iter	1000	Cost	0.43012426150743704
Iter	1000	Cost	0.44916031655884403
Iter	1000	Cost	0.42985479347721905
Iter	1000	Cost	0.4488503058817796
Iter	1000	Cost	0.4295893889368491
Iter

({1000: 0.47049688534289885},
 array([ 3.15267538e-02,  4.38592690e-02,  1.82394637e-01,  7.27657289e-02,
         2.81683690e-04, -1.58921860e-04, -5.94869592e-04, -2.47270611e-04,
         5.51932783e-04,  2.26761495e-04,  1.51071202e-04,  3.05608006e-03,
        -1.13197589e-03, -8.16912730e-02,  1.44483806e-05, -4.65383514e-05,
        -7.39608956e-05, -1.01074526e-05,  5.54703739e-05,  1.84514942e-06,
         3.33636766e-02,  5.58146134e-02,  1.82897269e-01, -9.88462272e-02,
         3.48754542e-04, -6.74411296e-04, -1.27433782e-03, -2.94192307e-04,
         7.76822709e-04,  1.96607498e-04]),
 0.004111914763563522)

<b>(4) PREDICTION</b>

In [6]:
predictions = regressor.predict(X_test)


<b>(5) EVALUATION-VISUALIZATION</b>

In [7]:
print("LR classification accuracy:", accuracy(y_test, predictions))

# Predicting on Test-Set using the trained  model (by the learned '')
#print(f"\nPredicted Class-Label : {predictions}")


LR classification accuracy: 0.9298245614035088


<b>CONCLUSION</b>
- The model is performing well for classification