In [2]:
import numpy as np

class LogisticRegressionSGD:
    def __init__(self, learning_rate=0.01, batch_size=32, max_iters=1000):
        self.learning_rate = learning_rate
        self.batch_size = batch_size
        self.max_iters = max_iters
        self.weights = None

    # Sigmoid function
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    # Compute the binary cross-entropy loss (negative log-likelihood)
    def compute_loss(self, X, y):
        N = len(y)
        y_pred = self.sigmoid(np.dot(X, self.weights))
        # Avoid log(0) by using small epsilon
        epsilon = 1e-10
        loss = -np.mean(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))
        return loss

    # Compute the gradient of the loss function
    def compute_gradient(self, X, y):
        y_pred = self.sigmoid(np.dot(X, self.weights))
        gradient = np.dot(X.T, (y_pred - y)) / len(y)
        return gradient

    # Mini-batch SGD for optimization
    def fit(self, X, y):
        # Initialize weights
        self.weights = np.zeros(X.shape[1])

        for i in range(self.max_iters):
            # Shuffle the data before creating mini-batches
            indices = np.arange(X.shape[0])
            np.random.shuffle(indices)
            X = X[indices]
            y = y[indices]

            # Mini-batch gradient descent
            for batch_start in range(0, X.shape[0], self.batch_size):
                X_batch = X[batch_start:batch_start + self.batch_size]
                y_batch = y[batch_start:batch_start + self.batch_size]

                # Compute gradient for the current mini-batch
                gradient = self.compute_gradient(X_batch, y_batch)

                # Update weights
                self.weights -= self.learning_rate * gradient

            # Optional: Print loss every 100 iterations
            if i % 100 == 0:
                loss = self.compute_loss(X, y)
                print(f"Iteration {i}, Loss: {loss}")

    # Predict binary labels for input data X
    def predict(self, X):
        y_pred = self.sigmoid(np.dot(X, self.weights))
        return (y_pred >= 0.5).astype(int)

# Example usage
if __name__ == "__main__":
    # Generate synthetic data for binary classification
    np.random.seed(42)
    X = np.random.randn(100, 2)  # 100 samples, 2 features
    y = (X[:, 0] + X[:, 1] > 0).astype(int)  # Label is 1 if the sum of the features is positive

    # Initialize logistic regression model
    model = LogisticRegressionSGD(learning_rate=0.01, batch_size=10, max_iters=1000)

    # Train the model
    model.fit(X, y)

    # Predict labels on the training data
    predictions = model.predict(X)
    accuracy = np.mean(predictions == y)
    print(f"Accuracy: {accuracy * 100:.2f}%")


Iteration 0, Loss: 0.678071322953969
Iteration 100, Loss: 0.27541501519448713
Iteration 200, Loss: 0.2102191347994054
Iteration 300, Loss: 0.17999393020274776
Iteration 400, Loss: 0.16148696951164646
Iteration 500, Loss: 0.1485784134805005
Iteration 600, Loss: 0.13886616460488407
Iteration 700, Loss: 0.13118763415156043
Iteration 800, Loss: 0.12490137054229311
Iteration 900, Loss: 0.11961990147443118
Accuracy: 99.00%
