# Logistic Regression

- <https://towardsdatascience.com/logistic-regression-from-scratch-69db4f587e17>
- <https://aihubprojects.com/logistic-regression-from-scratch/>
- <https://thelaziestprogrammer.com/sharrington/math-of-machine-learning/solving-logreg-newtons-method>

In [None]:
import matplotlib.pyplot as plot
import numpy as np
from utility import display, load_dataset, sigmoid, split_dataset
from sklearn.linear_model import LogisticRegression as ScikitLogisticRegression

The following class contains model fitting and prediction methods using Logistic Regression
using [Gradient Descent](https://en.wikipedia.org/wiki/Gradient_descent) with [sigmoid](https://en.wikipedia.org/wiki/Sigmoid_function) and [Log Loss](https://en.wikipedia.org/wiki/Log_loss) function.

In [None]:
class LogisticRegression:
    def __init__(self, verbose = False):
        self.verbose = verbose
        self.weights = np.zeros(0)

    @staticmethod
    def log_loss(yp, y):
        # loss function to minimize the error of the model
        return (-y * np.log(yp) - (1 - y) * np.log(1 - yp)).mean()

    def fit(self, x, y, lr=0.05, epochs=10000):
        # weights initialization
        self.weights = np.zeros(x.shape[1])
        losses = []

        for i in range(epochs):
            # calculate W * Xi
            z = np.dot(x, self.weights)

            # predict values
            yp = sigmoid(z)

            # calculate the gradient
            gradient = np.dot(x.T, (yp - y)) / y.size

            # update weights
            self.weights -= lr * gradient

            # calculate new W * Xi
            z = np.dot(x, self.weights)
            yp = sigmoid(z)

            # calculate the loss
            loss = self.log_loss(yp, y)

            # display loss
            if self.verbose and i % 1000 == 0:
                print(f'loss in iteration {i} -> {loss} \t')

            # collect loss values
            losses.append(loss)
        # return loss values
        return losses

    def predict(self, x):
        # Predicting with sigmoid function
        z = np.dot(x, self.weights)
        # Returning binary result
        return sigmoid(z).round()

Auxiliary method to display summary statistics

In [None]:
def display_statistics(x_values, y_values):
    print("Mean(x)=%s Mean(Y)=%s" % (np.mean(x_values), np.mean(y_values)))
    print("Median(x)=%s Median(Y)=%s" % (np.median(x_values), np.median(y_values)))
    print("StdDev(x)=%s StdDev(Y)=%s" % (np.std(x_values), np.std(y_values)))
    print("Var(x)=%s Var(Y)=%s" % (np.var(x_values), np.var(y_values)))
    print("Cov(x,y)=%s" % np.cov(x_values, y_values))
    print("Cor(x,y)=%s" % np.correlate(x_values, y_values))

load training and test data from CSV file

In [None]:
dataset, target = load_dataset("data/banknote.csv")
train_x, train_y, test_x, test_y = split_dataset(dataset, target, 0.75)
print(f"Training set size: {len(train_x)}, Testing set size: {len(test_x)}")

In [None]:
model = LogisticRegression()

train the model with training data and display the losses

In [None]:
plot.plot(model.fit(train_x, train_y))

predict using test data

In [None]:
predictions = model.predict(test_x)

display the results of test data prediction

In [None]:
display(test_y, predictions)

Let's try scikit-learn [Logistic Regression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LogisticRegression.html)

In [None]:
model = ScikitLogisticRegression()
model.fit(train_x, train_y)
predictions = model.predict(test_x)
display(test_y, predictions)