In [2]:
import numpy as np

class LogisticRegression:
    def __init__(self, num_iters=1000, lr=1e-2, tol=1e-6, verbose=False):
        self.num_iters = num_iters
        self.lr = lr
        self.tol = tol
        self.verbose = verbose
        self.w = None
        self.b = 0.0

    @staticmethod
    def _sigmoid(z):
        # stable sigmoid
        z = np.clip(z, -40, 40)
        return 1.0 / (1.0 + np.exp(-z))

    @staticmethod
    def _bce_loss(y, p):
        eps = 1e-12
        p = np.clip(p, eps, 1 - eps)
        return -np.mean(y * np.log(p) + (1 - y) * np.log(1 - p))

    def fit(self, X, y):
        N, F = X.shape
        self.w = np.zeros(F)
        self.b = 0.0

        prev_loss = np.inf
        for t in range(self.num_iters):
            z = X @ self.w + self.b
            p = self._sigmoid(z)

            loss = self._bce_loss(y, p)

            # gradients of BCE wrt w,b
            err = (p - y)  # (N,)
            dw = (X.T @ err) / N
            db = np.mean(err)

            # gradient descent
            self.w -= self.lr * dw
            self.b -= self.lr * db

            if self.verbose and (t % 100 == 0 or t == self.num_iters - 1):
                print(f"iter {t}  loss={loss:.6f}")

            # early stopping when improvement is tiny
            if prev_loss - loss < self.tol:
                break
            prev_loss = loss

    def predict_proba(self, X):
        return self._sigmoid(X @ self.w + self.b)

    def predict(self, X, threshold=0.5):
        return (self.predict_proba(X) >= threshold).astype(int)

# Example
X_train = np.array([[1,2],[3,5],[5,6]])
y_train = np.array([1,1,0])

X_test  = np.array([[1,3],[3,8],[5,6]])

model = LogisticRegression(num_iters=2000, lr=0.1, verbose=True)
model.fit(X_train, y_train)

print("w:", model.w, "b:", model.b)
print("proba:", model.predict_proba(X_test))
print("pred:", model.predict(X_test))

iter 0  loss=0.693147
iter 100  loss=0.285145
iter 200  loss=0.167200
iter 300  loss=0.116095
iter 400  loss=0.088294
iter 500  loss=0.071000
iter 600  loss=0.059265
iter 700  loss=0.050804
iter 800  loss=0.044427
iter 900  loss=0.039454
iter 1000  loss=0.035471
iter 1100  loss=0.032211
iter 1200  loss=0.029494
iter 1300  loss=0.027197
iter 1400  loss=0.025229
iter 1500  loss=0.023524
iter 1600  loss=0.022034
iter 1700  loss=0.020721
iter 1800  loss=0.019554
iter 1900  loss=0.018511
iter 1999  loss=0.017582
w: [-5.46847428  3.31519672] b: 3.5642522460820723
proba: [0.99967824 0.99999886 0.0200959 ]
pred: [1 1 0]
