In [1]:
import numpy as np

from sklearn import datasets
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm

In [2]:
learning_rate = 1e-2
n_iters = 1000
test_size = 0.2
random_state = 42

In [3]:
class LogisticRegression:
    """Linear regression with sigmoid function: binary classification"""
    def __init__(self, learning_rate=0.001, n_iters=1000) -> None:
        self.lr = learning_rate
        self.n_iters = n_iters
        self.betas = None
        self.intercept = None
    
    def fit(self, X: np.array, y: np.array) -> None:
        n_samples, n_features = X.shape

        self.betas = np.zeros(n_features)
        self.intercept = 0

        for _ in tqdm(range(self.n_iters)):
            linear_model = np.dot(X, self.betas) + self.intercept
            y_pred = self._sigmoid(linear_model)

            db = np.dot(X.T, (y_pred - y)) / n_samples
            di = np.sum(y_pred - y)

            self.betas -= self.lr * db
            self.intercept -= self.lr * di

    def predict(self, X) -> np.array:
        linear_model = np.dot(self.betas, X.T) + self.intercept
        y_pred_sigmoid = self._sigmoid(linear_model)
        return (y_pred_sigmoid >= 0.5).astype(int)
    
    def _sigmoid(self, x: np.array) -> np.array:
        return 1 / (1 + np.exp(-x, dtype=np.float128))

In [4]:
def tp(y_true, y_pred):
    return np.sum((y_true == 1) & (y_pred == 1))

def fp(y_true, y_pred):
    return np.sum((y_true == 0) & (y_pred == 1))

def fn(y_true, y_pred):
    return np.sum((y_true == 1) & (y_pred == 0))

def tn(y_true, y_pred):
    return np.sum((y_true == 0) & (y_pred == 0))

def accuracy(y_true, y_pred):
    tp_ = tp(y_true, y_pred)
    tn_ = tn(y_true, y_pred)
    fp_ = fp(y_true, y_pred)
    fn_ = fn(y_true, y_pred)
    return (tp_ + tn_) / (tp_ + fp_ + tn_ + fn_)

def precision(y_true, y_pred):
    tp_ = tp(y_true, y_pred)
    fp_ = fp(y_true, y_pred)
    return tp_ / (tp_ + fp_)

def recall(y_true, y_pred):
    tp_ = tp(y_true, y_pred)
    fn_ = fn(y_true, y_pred)
    return tp_ / (tp_ + fn_)

def f1_score(y_true, y_pred):
    return 2 / ((1 / recall(y_true, y_pred)) + (1 / precision(y_true, y_pred)))

In [5]:
X, y = datasets.make_classification(n_samples=1000,
                                    n_features=10,
                                    n_classes=2,
                                    random_state=random_state)

In [6]:
X_train, X_test, y_train, y_test = train_test_split(X, y,
                                                    test_size=0.2,
                                                    random_state=random_state)

In [7]:
log_reg = LogisticRegression(learning_rate=learning_rate,
                             n_iters=n_iters)
log_reg.fit(X_train, y_train)

  0%|          | 0/1000 [00:00<?, ?it/s]

In [8]:
preds = log_reg.predict(X_test)

In [9]:
accuracy(y_test, preds)

0.825

In [10]:
precision(y_test, preds)

0.8877551020408163

In [11]:
recall(y_test, preds)

0.7837837837837838

In [12]:
f1_score(y_test, preds)

0.832535885167464

In [13]:
print(classification_report(y_test, preds))

              precision    recall  f1-score   support

           0       0.76      0.88      0.82        89
           1       0.89      0.78      0.83       111

    accuracy                           0.82       200
   macro avg       0.83      0.83      0.82       200
weighted avg       0.83      0.82      0.83       200

