In [275]:
import pandas as pd
import numpy as np

In [280]:
default=pd.read_csv("../datasets/Default.csv")

In [282]:
default.head()

Unnamed: 0,default,student,balance,income
0,0,0,729.526495,44361.625074
1,0,1,817.180407,12106.1347
2,0,0,1073.549164,31767.138947
3,0,0,529.250605,35704.493935
4,0,0,785.655883,38463.495879


In [281]:
default['default']=default['default'].apply(lambda x: 0 if x=='No' else 1)
default['student']=default['student'].apply(lambda x: 1 if x=='Yes' else 0)

# Logistic Regression

Note that linear regression won't work on categorical response. Main reasons are (1) the difference between one category to another may not be fixed (2) linear regression outputs value outside of $[0,1]$, making it hard to interpret as probabilities.

Thus, we use a logistic function (aka sigmoid function) to squeeze output values into a valid probability 
distribution

$\sigma(z_i)=\frac{1}{1+e^{-z_i}}$ where $z_i=\beta_{0}+\beta_{1}x_i$

Then $\hat{p}_i=P(y=1|x_i)$

We use Maximum Likelihood Estimate (MLE) to estimate the parameters

$L(\beta_0,\beta_1)=\prod_{i=1}^{n}\hat{p}_i^{y_i}(1-\hat{p}_i)^{1-y_i}$

Then the log-likelihood function becomes

$l(\beta_0,\beta_1)=\sum_{i=1}^{n}y_ilog(\hat{p}_i)+(1-y_i)log(1-\hat{p}_i)$

With some algebra and partial derivatives, we arrive at

$\frac{dl(\beta_0,\beta_1)}{d\beta_0}=\sum_{i=1}^{n}(y_i-\hat{p}_i)$ and
$\frac{dl(\beta_0,\beta_1)}{d\beta_1}=\sum_{i=1}^{n}(y_i-\hat{p}_i)x_i$

Now, update the gradient as follows:

$\beta_0=\beta_0+\eta\sum_{i=1}^{n}(y_i-\hat{p}_i)$

$\beta_1=\beta_1+\eta\sum_{i=1}^{n}(y_i-\hat{p}_i)x_i$, where $\eta$ is the learning rate.

In [283]:
X=default['balance'].values.reshape(-1)

In [284]:
X = (X - np.mean(X)) / np.std(X)

In [285]:
y=default['default'].values

In [286]:
X.shape, y.shape

((10000,), (10000,))

In [287]:
beta_0 = np.random.normal(0, 0.01)
beta_1 = np.random.normal(0, 0.01)
lr=0.0001
n_iter=10000
tol=1e-6
prev_loss=float('inf')

In [288]:
for _ in range(n_iter):
    p_hat=1/(1+np.exp(-(beta_0+beta_1*X)))
    
    loss = -np.mean(y * np.log(p_hat + 1e-15) + (1 - y) * np.log(1 - p_hat + 1e-15))
    # Check convergence
    if abs(prev_loss - loss) < tol:
        print(f"Converged at iteration {i}, loss = {loss:.6f}")
        break
    
    beta_0+=lr*np.sum(y-p_hat)
    beta_1+=lr*np.sum((y-p_hat) * X)

In [289]:
print("b_0 from scratch:",beta_0)
print("b_1 from scratch:",beta_1)

b_0 from scratch: -6.05767351529054
b_1 from scratch: 2.6597755249632526


In [290]:
from sklearn.linear_model import LogisticRegression

In [291]:
lr = LogisticRegression(penalty=None)

In [292]:
lr.fit(X.reshape(-1,1),y)

In [293]:
print("b_0 from sklearn:",lr.intercept_)
print("b_1 from sklearn:",lr.coef_)

b_0 from sklearn: [-6.05769037]
b_1 from sklearn: [[2.65978348]]


# Logistic Regression (Multiple Variables)

In [384]:
default.head()

Unnamed: 0,default,student,balance,income
0,0,0,729.526495,44361.625074
1,0,1,817.180407,12106.1347
2,0,0,1073.549164,31767.138947
3,0,0,529.250605,35704.493935
4,0,0,785.655883,38463.495879


In [456]:
X=default.drop('default',axis=1).values
y=default['default'].values

In [457]:
X.shape ,y.shape

((10000, 3), (10000,))

In [458]:
X = (X - X.mean(axis=0)) / X.std(axis=0)
X = np.hstack([np.ones((X.shape[0], 1)), X])

In [453]:
beta=np.zeros(X.shape[1])  # one beta per feature, including intercept
lr=0.0001
n_iter=10000
tol=1e-6
prev_loss=float('inf')

In [461]:
for i in range(n_iter):
    z=X@beta
    p_hat=1/(1+np.exp(-z))
    loss = -np.mean(y * np.log(p_hat + 1e-15) + (1 - y) * np.log(1 - p_hat + 1e-15))
    
    # Check convergence
    if abs(prev_loss - loss) < tol:
        print(f"Converged at iteration {i}, loss = {loss:.6f}")
        break
    
    grad = X.T@(y-p_hat)  # shape: (n_features + 1,)
    beta += lr * grad

In [463]:
print("beta from scratch:",beta)

beta from scratch: [-6.16565149 -0.29478268  2.77469481  0.04045401]


In [466]:
lr = LogisticRegression(penalty=None,fit_intercept=False)

In [467]:
lr.fit(X,y)

In [469]:
print("beta from sklearn:",lr.coef_.ravel())

beta from sklearn: [-6.1656557  -0.29478494  2.774699    0.04045078]
