In [1]:
import numpy as np
import pandas as pd

In [1]:
class MyLogReg():
    def __init__(
            self,
            n_iter,
            learning_rate
    ):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.W = None
        self.eps = 1e-15

    def sigmoid(self, z):
            return 1 / (1 + np.exp(-z))

    def log_loss(self, y_true, y_pred):
        y_pred = np.clip(y_pred, self.eps, 1 - self.eps)
        return -np.mean(y_true * np.log(y_pred + self.eps) + (1 - y_true) * np.log(1 - y_pred + self.eps))

    def fit(self, X, y, verbose):
        X = pd.concat([pd.Series(1, index=X.index, name='bias'), X], axis=1)
        n = X.shape[1]
        self.W = np.ones(n)

        if verbose:
            print(f"start | loss: {self.log_loss(y, self.sigmoid(np.dot(X, self.W)))}")

        for i in range(self.n_iter):
            y_pred = self.sigmoid(np.dot(X, self.W))
            log_loss = self.log_loss(y, y_pred)
            gradient = np.dot(X.T, (y_pred - y)) / len(y)
            self.W -= self.learning_rate * gradient

            if verbose and (i + 1) % verbose == 0:
                print(f"{i + 1} | loss: {log_loss}")

    def get_coef(self):
        return self.W[1:]

    def predict_proba(self, X):
        X = pd.concat([pd.Series(1, index=X.index, name='bias'), X], axis=1)
        return self.sigmoid(np.dot(X, self.W))

    def predict(self, X, threshold=0.5):
        proba = self.predict_proba(X)
        return (proba > threshold).astype(int)

    def get_best_score(self):
        return self.score

    def __str__(self):
        return f"{__class__.__name__} class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"