In [1]:
import numpy as np

In [None]:
class LogisticRegression():
    def __init__(self, tol = 1e-3, epochs = 10000, eta = 1e-3, patience=5):
        self.tol = tol
        self.epochs = epochs
        self.eta = eta
        self.patience = patience
        self.patience_counter = patience
        self.best_lost = np.inf

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def cal_loss(self):
        loss = 0
        for i in range(self.X_train.shape[0]):
            h0 = self.sigmoid(np.dot(self.X_train[i, :].reshape(1, -1), self.w))
            loss += self.y_train[i] * np.log(h0) + (1 - self.y_train[i]) * np.log(1 - h0)
        return -loss
    
    def fit(self, X_train, y_train):
        self.n, self.d = X_train.shape
        self.X_train = np.concatenate((np.ones((self.n, 1)), X_train), axis=1)
        self.y_train = y_train
        self.w = np.random.rand(self.d + 1, 1)

        for epoch in range(self.epochs):
            index = np.random.permutation(self.n)
            for i in index:
                x = self.X_train[i, :].reshape(1, -1) # (d+ 1, ) -> (1, d+ 1)
                y = self.y_train[i] # scalar

                h0 = self.sigmoid(np.dot(x, self.w))
                self.w -= self.eta * (h0 - y) * x.T
            loss = self.cal_loss()
            print(f'Epoch: {epoch} / {self.epochs}. Loss = {loss}')
            
            if self.best_lost - loss > self.tol:
                self.best_lost = loss
                self.patience_counter = self.patience
            else:
                self.patience_counter -= 1
                if self.patience_counter == 0:
                    print('Early Stopping')        
        return self.w

    def predict(self, X_test):
        X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
        pred = self.sigmoid(np.dot(X_test, self.w))
        pred = [1 if i > 0.5 else 0 for i in pred]
        return pred

    def evaluate(self, X_test, y_test):
        X_test = np.concatenate((np.ones((X_test.shape[0], 1)), X_test), axis=1)
        pred = self.sigmoid(np.dot(X_test, self.w))
        pred = np.array([1 if i > 0.5 else 0 for i in pred])
    
        return np.sum(pred == y_test) / len(y_test)


In [78]:
from sklearn.datasets import load_breast_cancer
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

np.random.seed(42)

breast_cancer = load_breast_cancer()
X = breast_cancer.data
y = breast_cancer.target
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

lg = LogisticRegression(epochs=2000, eta=1e-4)
lg.fit(X_train, y_train)

Epoch: 0 / 2000. Loss = [[2837.95029512]]
Epoch: 1 / 2000. Loss = [[2669.1745717]]
Epoch: 2 / 2000. Loss = [[2501.67260069]]
Epoch: 3 / 2000. Loss = [[2335.74936225]]
Epoch: 4 / 2000. Loss = [[2171.67866198]]
Epoch: 5 / 2000. Loss = [[2009.90253974]]
Epoch: 6 / 2000. Loss = [[1850.79247371]]
Epoch: 7 / 2000. Loss = [[1694.94664962]]
Epoch: 8 / 2000. Loss = [[1543.02229724]]
Epoch: 9 / 2000. Loss = [[1395.70767464]]
Epoch: 10 / 2000. Loss = [[1253.92930887]]
Epoch: 11 / 2000. Loss = [[1118.23300537]]
Epoch: 12 / 2000. Loss = [[989.94457657]]
Epoch: 13 / 2000. Loss = [[869.80418892]]
Epoch: 14 / 2000. Loss = [[759.27125064]]
Epoch: 15 / 2000. Loss = [[659.44546002]]
Epoch: 16 / 2000. Loss = [[571.7949362]]
Epoch: 17 / 2000. Loss = [[496.51813986]]
Epoch: 18 / 2000. Loss = [[433.24899607]]
Epoch: 19 / 2000. Loss = [[380.91617295]]
Epoch: 20 / 2000. Loss = [[338.0289819]]
Epoch: 21 / 2000. Loss = [[303.16090689]]
Epoch: 22 / 2000. Loss = [[274.6184542]]
Epoch: 23 / 2000. Loss = [[251.14215

array([[ 0.51716049],
       [-0.08016596],
       [-0.55828521],
       [-0.42069351],
       [-0.87513582],
       [-0.54525931],
       [-0.27187463],
       [-0.23557236],
       [-0.61697524],
       [-0.03357409],
       [ 0.26481024],
       [-0.36872975],
       [ 0.29347594],
       [-0.84124121],
       [-0.94827316],
       [-0.22471357],
       [ 0.42487307],
       [ 0.19663762],
       [-0.36719285],
       [ 0.31487376],
       [ 0.69071022],
       [-1.12895586],
       [-1.26604711],
       [-0.81929321],
       [-0.75492834],
       [-0.38409146],
       [-0.27243278],
       [-0.60497461],
       [-0.75525593],
       [-0.84056954],
       [ 0.16152089]])

In [79]:
predict = lg.predict(X_test)
print(predict)

[0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1]


In [80]:
lg.evaluate(X_test, y_test)

np.float64(0.9649122807017544)

# Logistic Regression from sklearn