In [13]:
import numpy as np
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_iris

In [14]:
dataset = load_iris()
X, y = dataset.data, dataset.target
X.shape, y.shape


((150, 4), (150,))

In [15]:
mask = y < 2
X = X[mask]
y = y[mask]
X.shape, y.shape

((100, 4), (100,))

In [16]:
stratify = y # 50:50
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 42, stratify = stratify)

In [19]:
class LogisticRegression:
    def __init__(self, lr = 1e-2, epoch = 10000):
        """
        Initizes the logistic regression
        Args:
            lr: float, learning rate
            epoch: int, number of epoch
        """
        self.lr = lr
        self.epoch = epoch
        self.weights = None 
        self.bias = None
        self.loss = [] 


    def sigmoid(self, z):
        """
        Compute sigmoid function
        Args:
            z(array):input value of shape (n_samples, )
        Returns:
            array: sigmoid of input value of shape (n_samples, )
        """
        z = np.clip(z, -500, 500)
        return 1 / (1 + np.exp(-z))
    
    
    def compute_cost(self, X, y):
        """"
        Compute binary cross-entropy loss
        Args:
            X (array): input features of shape (n_samples, n_features)
            y (array): target valus of shape (n_samples, )
        Returns:
            float: binary cross-entropy loss
        """
        num_samples = X.shape[0]
        z = np.dot(X, self.weights) + self.bias
        s = self.sigmoid(z)
        s = np.clip(s, 1e-15, 1 - 1e-15) # avoid log0
        cost = (-1 / num_samples) * np.sum(y * np.log(s) + (1 - y) * np.log(1 - s))
        return cost
    
    def compute_gradient(self,X,y):
        """"
        Compute gradient of the cost function
        Args:
            X (array): input features of shape (n_samples, n_features)
            y (array): target valus of shape (n_samples, )
        Returns:
            tuple: gradient of weights and bias
        """
        num_samples = X.shape[0]
        z = np.dot(X, self.weights) + self.bias
        s = self.sigmoid(z)
        dw = (1 / num_samples) * np.dot(X.T, s-y)
        db = (1 / num_samples) * np.sum(s - y)
        return dw, db

    def train(self, X, y):
        """
        Train the logistic regression model
        Args:
            X (array): input features of shape (n_samples, n_features)
            y (array): target valus of shape (n_samples, )

        """
        # Initialize parameters
        self.weights = np.zeros(X.shape[1])
        self.bias = 0
        # Gradient descent
        for i in range(self.epoch + 1):
            # Compute gradient
            dw, db = self.compute_gradient(X, y)
            # Update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Print loss at every 100th epoch
            if i % 100 == 0:
                loss = self.compute_cost(X, y)
                self.loss.append(loss)
                print(f"Epoch {i}, Loss: {loss}")

    def predict(self, X, threshold = 0.5):
        """
        Make predictions
        Args:
            X(array): input features of shape (n_samples, n_features)
        Returns:
            array: predicted values of shape (n_samples, )
        """

        s = self.sigmoid(np.dot(X, self.weights) + self.bias)
        return s >= threshold

In [22]:
model = LogisticRegression(epoch = 10000)
model.train(X_train, y_train)

Epoch 0, Loss: 0.6869227525215216
Epoch 100, Loss: 0.3919876807139968
Epoch 200, Loss: 0.26335082695983625
Epoch 300, Loss: 0.1955577248572467
Epoch 400, Loss: 0.15477202865907513
Epoch 500, Loss: 0.12783999732921023
Epoch 600, Loss: 0.10883257419317963
Epoch 700, Loss: 0.0947419283602128
Epoch 800, Loss: 0.08389660805250809
Epoch 900, Loss: 0.07529942770733478
Epoch 1000, Loss: 0.0683209302810859
Epoch 1100, Loss: 0.06254519147114798
Epoch 1200, Loss: 0.057686821487391016
Epoch 1300, Loss: 0.05354371547375815
Epoch 1400, Loss: 0.04996886059294166
Epoch 1500, Loss: 0.04685282850677332
Epoch 1600, Loss: 0.04411252386105618
Epoch 1700, Loss: 0.04168373471708385
Epoch 1800, Loss: 0.03951606990875181
Epoch 1900, Loss: 0.03756943841052399
Epoch 2000, Loss: 0.03581155042624604
Epoch 2100, Loss: 0.03421611091756375
Epoch 2200, Loss: 0.03276149201271555
Epoch 2300, Loss: 0.03142974270686969
Epoch 2400, Loss: 0.030205840094625964
Epoch 2500, Loss: 0.029077116189499055
Epoch 2600, Loss: 0.028032

In [24]:
y_pred = model.predict(X_test)
print(classification_report(y_true = y_test, y_pred = y_pred))

              precision    recall  f1-score   support

           0       1.00      1.00      1.00        10
           1       1.00      1.00      1.00        10

    accuracy                           1.00        20
   macro avg       1.00      1.00      1.00        20
weighted avg       1.00      1.00      1.00        20

