In [None]:
import numpy as np
import pandas as pd
import math
import matplotlib.pyplot as plt

In this notebook I apply logistic regression to two toy examples. My objective is to have a better understanding of the Newton-Raphson method for optimisation, used to fit a logistic regression model. 

## 1D example

In [None]:
data = pd.read_csv('../datasets/logistic_regression_1d.csv')

In [None]:
fig, ax = plt.subplots()
ax.plot(data[data.y == 0].x, data[data.y == 0].y, 'o')
ax.plot(data[data.y == 1].x, data[data.y == 1].y, 'o')

We have to fit a model in the following form:

$\text{logit}(y) = \beta_0 + \beta_1 x$

where $\text{logit}(y) = \text{ln}\left(\frac{y}{1 - y}\right)$, and therefore $y = \frac{e^{\beta_0 + \beta_1 x_{1,i} + \ldots + \beta_{k} x_{k,i}}}{1 + e^{\beta_0 + \beta_1 x_{1,i} + \ldots + \beta_{k} x_{k,i}}}$

We do this by applying iteratively reweighted least squares, which form is obtained after derivation based on the Newton-Raphson's method (see https://www.cs.cmu.edu/~mgormley/courses/10701-f16/slides/lecture5.pdf):

$\vec{\beta}_{n+1} = \vec{\beta}_n - \left(X^TSX\right)^{-1}\left(X^T(\mu - y)\right)$

where $S = \text{diag}(\mu_i(1 - \mu_i))$ is a diagonal matrix of weights and each weight is calculated as $\mu_i = 1/(1+e^{-\left(\beta_0 + \beta_1 x_{1,i} + \ldots + \beta_{k} x_{k,i}\right)})$

In [None]:
def fit_logistic(x, y):
    '''
    Fits a logistic regression model. X and y should be m x n arrays, where m is the number
    of observartaions and n is the number of features
    '''
    X =  np.hstack((np.ones((x.shape[0], 1)), x))
    B = np.zeros((X.shape[1], 1))
    B[0] = math.log(np.mean(y) / (1 - np.mean(y)))
    prev_B = B + 1
    
    while np.sum(np.abs(B - prev_B)) > 0.001:
        prev_B = B
        
        mu = 1 / (1 + np.exp(-np.dot(X, B)))
        S = np.diag((mu * (1 - mu)).flatten())
        B = B - np.dot(np.linalg.inv(np.dot(np.dot(X.T, S), X)), np.dot(X.T, mu - y))
        print(B)
        
    return B

In [None]:
B = fit_logistic(data.x.values.reshape(-1, 1), data.y.values.reshape(-1, 1))

In [None]:
fig, ax = plt.subplots()
ax.plot(data[data.y == 0].x, data[data.y == 0].y, 'o')
ax.plot(data[data.y == 1].x, data[data.y == 1].y, 'o')

xs = []
ys = []
for x in np.arange(data.x.min() - 1, data.x.max() + 1, 0.1):
    y = math.exp(B[0] + B[1] * x) / (1 + math.exp(B[0] + B[1] * x))
    xs.append(x)
    ys.append(y)
ax.plot(xs, ys)

In [None]:
# Making predictions to show a confusion matrix
data['p'] = data['x'].apply(lambda x: math.exp(B[0] + B[1] * x) / (1 + math.exp(B[0] + B[1] * x)))
data['y_pred'] = data['p'].apply(lambda x: 1 if x > 0.5 else 0)

In [None]:
# Confusion matrix
data.groupby(['y', 'y_pred'])['x'].count().reset_index().pivot(index = 'y', columns = 'y_pred', values = 'x')

## 2D example

In [None]:
data = pd.read_csv('../datasets/logistic_regression_2d.csv')

In [None]:
B = fit_logistic(data[['Age', 'EstimatedSalary']].values, data.Purchased.values.reshape(-1, 1))