# Logistic Regression, Different Loss Functions

## Data

In [1]:
import numpy as np
from numpy.random import binomial, normal
from scipy.stats import bernoulli, binom

np.random.seed(37)

n = 10_000
X = np.hstack([
    np.array([1 for _ in range(n)]).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1),
    normal(0.0, 1.0, n).reshape(n, 1)
])
z = np.dot(X, np.array([1.0, 2.0, 3.0])) + normal(0.0, 1.0, n)
p = 1.0 / (1.0 + np.exp(-z))
y = binom.rvs(1, p)

In [2]:
X

array([[ 1.        , -0.05446361,  0.13388209],
       [ 1.        ,  0.67430807, -0.96145276],
       [ 1.        ,  0.34664703, -0.103717  ],
       ...,
       [ 1.        , -0.72555704, -0.91534393],
       [ 1.        ,  0.33369825, -1.25826271],
       [ 1.        ,  0.77013718,  0.38102387]])

In [3]:
y

array([0, 1, 1, ..., 0, 0, 1])

In [4]:
p

array([0.81982517, 0.56877323, 0.87879833, ..., 0.00978107, 0.03430839,
       0.98321842])

## Learn weights with Scikit-Learn

In [5]:
from sklearn.linear_model import LogisticRegression

lr = LogisticRegression(fit_intercept=False, solver='lbfgs')
lr.fit(X, y)

print(lr.coef_[0])

[0.89312519 1.71445566 2.59091944]


## Loss functions

- $-\hat{y} y + \log{(1 + \exp \hat{y})}$

In [6]:
import autograd.numpy as np
from autograd import grad
from autograd.numpy import exp, log, sqrt

def loss1(w, X, y):
    y_pred = np.dot(X, w)
    return np.mean(-(y_pred * y) + log(1.0 + exp(y_pred)), axis=None)

def loss2(w, X, y):
    y_pred = 1 / (1 + exp(-np.dot(X, w)))
    return np.mean((y - y_pred) ** 2.0, axis=None)

loss1_grad = grad(loss1)
loss2_grad = grad(loss2)

## Learn weights with autograd

In [7]:
def learn_weights(X, y, loss_grad, alpha=0.05, max_iter=10_000):
    w = np.array([0.01 for _ in range(X.shape[1])])

    for i in range(max_iter):
        loss = loss_grad(w, X, y)
        w = w - (loss * alpha)

    return w

### Binary y

In [8]:
learn_weights(X, y, loss1_grad)

array([0.89701086, 1.7217453 , 2.60190376])

In [9]:
learn_weights(X, y, loss2_grad)

array([0.84558344, 1.6619042 , 2.51455477])

### Probabilistic p

In [10]:
learn_weights(X, p, loss1_grad)

array([0.86917161, 1.74138012, 2.60448694])

In [11]:
learn_weights(X, p, loss2_grad)

array([0.82699915, 1.66435487, 2.49538807])

## Links

- [How to understand logistic regression cost function formula?](https://stats.stackexchange.com/questions/473426/how-to-understand-logistic-regression-cost-function-formula)
- [Loss function for Logistic Regression](https://stats.stackexchange.com/questions/268430/loss-function-for-logistic-regression)
- [Can logistic regression take continuous probability as label?](https://stats.stackexchange.com/questions/549910/can-logistic-regression-take-continuous-probability-as-label)