**Author**: Rajan Adhikari

**Program**: Logistic Regression on Toy Dataset

**Objective**: To illustrate the scratch implementation of Logistic Regression

**Prepared for**: MDS | School of Mathematical Science

**Date**: 8/21/2023

---

In [6]:
import numpy as np
import pandas as pd

SEED = 11

np.random.seed(SEED)

In [7]:
class LogisticRegression:
    """
        This is the scratch implementation of Logistic Regression
    """
    def __init__(self, X, y):
        self.param = {}
        self.m, self.n = X.shape
        self.param['W'] = np.random.randn(self.n,1) * 0.001
        self.param['b'] = np.zeros(1)

        self.X = X
        self.y = y
        self.result = pd.DataFrame()

    def train(self, alpha = 0.001, epochs = 10):
        for epoch in range(epochs):
            print("Epoch: ", epoch, end="")
            z = np.dot(self.X,self.param['W']) + self.param['b']

            self.y_pred = self.sigmoid(z)
            self.result[0] = self.y

            #Update the parameters
            self.param['W'] = self.param['W'] - alpha * 1/self.m * np.dot(self.X.transpose(), (self.y_pred - np.reshape(self.y, (self.m, 1))))
            self.param['b'] =  self.param['b'] - alpha * 1/self.m * np.sum(self.y_pred - np.reshape(self.y, (self.m, 1)))

            self.y_pred = self.sigmoid(np.dot(self.X,self.param['W']) + self.param['b'])
            loss = self.loss(self.y, self.y_pred)

            self.result[1] = self.y_pred
            print(", loss = ", loss)

        print("\nFinal Loss is ", loss)
        print("Coefficients are: \n W: {}, b = {}".format(self.param['W'], self.param['b']))

    @staticmethod
    def loss(y, y_pred):
        y_zero_loss =  y.T.dot(np.log(y_pred))
        y_one_loss = (1-y).T.dot(np.log(1 - y_pred))

        return -np.sum(y_zero_loss + y_one_loss)/len(y)

    @staticmethod
    def sigmoid(z):
        return 1.0/(1 + np.exp(-z))

    def predict(self, X):
        return self.sigmoid(np.dot(X,self.param['W']) + self.param['b'])

In [8]:
from sklearn.datasets import make_classification

def generate_data(no_of_data = 100, no_of_features = 4):
    features, target = make_classification(n_samples=no_of_data,
                                              n_features=no_of_features,
                                              n_classes=2,
                                              random_state=SEED)

    return features, target

In [9]:
no_of_data = 100000
no_of_features = 4
X, y = generate_data(no_of_data, no_of_features)
alpha = 0.1
epochs = 100
log_model = LogisticRegression(X, y)
log_model.train(alpha, epochs)

Epoch:  0, loss =  0.6369515532007411
Epoch:  1, loss =  0.5938916895497949
Epoch:  2, loss =  0.5607616106735371
Epoch:  3, loss =  0.5343849052409058
Epoch:  4, loss =  0.5127474809149306
Epoch:  5, loss =  0.49455704392542293
Epoch:  6, loss =  0.4789600311882209
Epoch:  7, loss =  0.4653735203317431
Epoch:  8, loss =  0.45338620460843626
Epoch:  9, loss =  0.4426990131263279
Epoch:  10, loss =  0.43308854288069726
Epoch:  11, loss =  0.424383879358419
Epoch:  12, loss =  0.4164514811530867
Epoch:  13, loss =  0.40918505582204906
Epoch:  14, loss =  0.40249860853714237
Epoch:  15, loss =  0.3963215591280728
Epoch:  16, loss =  0.39059523964723425
Epoch:  17, loss =  0.3852703336467681
Epoch:  18, loss =  0.3803049709000468
Epoch:  19, loss =  0.375663286878504
Epoch:  20, loss =  0.3713143174777097
Epoch:  21, loss =  0.36723113944315794
Epoch:  22, loss =  0.36339019353206153
Epoch:  23, loss =  0.3597707454472173
Epoch:  24, loss =  0.35635445196505405
Epoch:  25, loss =  0.353125