# Logistic regression

## Generate data

In [1]:
import numpy as np
from numpy.random import binomial, normal
from scipy.stats import bernoulli, binom

np.random.seed(37)

n = 10000

X = np.hstack([
    np.array([1 for _ in range(n)]).reshape(n, 1), 
    normal(0.0, 1.0, n).reshape(n, 1), 
    normal(0.0, 1.0, n).reshape(n, 1)
])

z = np.dot(X, np.array([1.0, 2.0, 3.0])) + normal(0.0, 0.2, n)
p = 1.0 / (1.0 + np.exp(-z))
y = binom.rvs(1, p)

## Types of logistic regression

### Logistic regression with L1 penalty

In [2]:
from sklearn import linear_model

model = linear_model.LogisticRegression(penalty='l1', solver='liblinear', fit_intercept=False)
model.fit(X, y)

coefs = ' + '.join([f'{c:.5f} * X_{i-1}' if i > 0 else f'{c:.5f}' for i, c in enumerate(model.coef_[0])])
print(f'y = {coefs}')

y = 0.97987 + 1.90850 * X_0 + 2.91831 * X_1


### Logistic regression with L2 penalty

In [3]:
model = linear_model.LogisticRegression(penalty='l2', solver='liblinear')
model.fit(X, y)

coefs = ' + '.join([f'{c:.5f} * X_{i-1}' if i > 0 else f'{c:.5f}' for i, c in enumerate(model.coef_[0])])
print(f'y = {coefs}')

y = 0.48904 + 1.90329 * X_0 + 2.90994 * X_1


## Performance