# Logistic Regression from Scratch

This notebook implements binary and multiclass logistic regression from scratch.
The focus is on understanding assumptions, loss functions, and optimization behavior,
not performance or abstractions.

In [None]:
import numpy as np

In [None]:
# Logistic regression: discriminative model
# - Directly models P(y | x), not the data distribution P(x | y)
# - Uses a linear score in x; sigmoid/softmax only map scores to (0, 1)
# - Trains by minimizing log-loss (penalizes confident wrong predictions)
# - Makes no explicit assumptions about how x is generated

# Loss intuition
# - For each sample, we want the predicted probability of the true class to be high
# - Log-loss is just the negative log-likelihood over all samples
# - Using log turns products into sums and magnifies large errors

In [None]:
# Binary logistic regression setup (data + helper)
X = np.array([[1.0, 1.0], [1.2, 0.8], [0.8, 1.3], [1.1, 1.4],
              [3.0, 3.1], [2.8, 2.9], [3.2, 2.7], [3.1, 3.3]])
Y = np.array([0, 0, 0, 0, 1, 1, 1, 1])

n_samples, n_features = X.shape


def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [None]:
# Binary logistic regression (gradient descent)
# - Linear score z = wÂ·x + b, sigmoid maps z to probability
# - We update weights to minimize average log-loss over the data

In [None]:
cycles = 200  # fixed number of gradient steps
step_size = 0.1
w0 = 0.0
w1 = np.zeros(n_features)

for i in range(cycles):
    z = np.dot(X, w1) + w0
    y_pred = sigmoid(z)
    err = Y - y_pred
    dw1 = (1 / n_samples) * np.dot(X.T, err)
    dw0 = (1 / n_samples) * np.sum(err)
    w0 -= step_size * dw0
    w1 -= step_size * dw1
    if i % 50 == 0:
        loss = -np.mean(Y * np.log(y_pred + 1e-9) + (1 - Y) * np.log(1 - y_pred + 1e-9))
        print(f"iter {i}, loss = {loss:.4f}")

X_new = np.array([[1.0, 1.0], [1.2, 0.8], [0.8, 1.3], [1.1, 1.4],
                  [3.0, 3.1], [2.8, 2.9], [3.2, 2.7], [3.1, 3.3]])

print("binary LR predictions (0/1):")
for x in X_new:
    z = np.dot(x, w1) + w0
    p1 = sigmoid(z)
    print(1 if p1 >= 0.5 else 0)

In [None]:
# Multiclass logistic regression (softmax)
# - One linear score per class
# - Softmax turns scores into a probability distribution over classes
# - Trained with cross-entropy loss via gradient descent

cycles = 1000
X = np.array([[1.0, 1.0], [1.2, 0.8], [0.8, 1.3], [1.1, 1.4],
              [3.0, 3.1], [2.8, 2.9], [3.2, 2.7], [3.1, 3.3]])
Y = np.array([0, 0, 1, 1, 1, 2, 2, 2])
classes = np.unique(Y)
w0_mult = np.zeros((1, len(classes)))
w_mult = np.zeros((n_features, len(classes)))
step_size = 0.1
Y_multi = np.zeros((n_samples, len(classes)))
for idx, c in enumerate(classes):
    Y_multi[Y == c, idx] = 1


def softmax(z):
    exp_z = np.exp(z - np.max(z, axis=1, keepdims=True))
    return exp_z / np.sum(exp_z, axis=1, keepdims=True)


for i in range(cycles):
    scores = np.dot(X, w_mult) + w0_mult
    dominance = softmax(scores)
    err = dominance - Y_multi
    dw1 = (1 / n_samples) * np.dot(X.T, err)
    dw0 = (1 / n_samples) * np.sum(err, axis=0, keepdims=True)
    w0_mult -= step_size * dw0
    w_mult -= step_size * dw1

print("softmax class probabilities:")
print(dominance)
print("multiclass LR predictions:")
print(np.argmax(dominance, axis=1))

## Known limitations

- Fixed learning rate (no scheduling or adaptivity).
- No regularization (L1/L2 not implemented).
- Stopping is based on a fixed iteration count, not a convergence test.
- No explicit train/validation split or evaluation metrics beyond basic sanity checks.