<a href="https://colab.research.google.com/github/Savvythelegend/MLE/blob/main/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np

class LogisticRegression:
    """
    Logistic Regression classifier using gradient descent.

    Parameters:
    ----------
    learning_rate : float
        The step size for gradient descent updates.
    num_iter : int
        Number of iterations for training loop.

    Attributes:
    ----------
    w : ndarray
        Weight vector.
    b : float
        Bias term.
    """

    def __init__(self, learning_rate=0.01, num_iter=1000):
        self.learning_rate = learning_rate
        self.num_iter = num_iter
        self.w = None
        self.b = None

    def sigmoid(self, z):
        """
        Sigmoid activation function.

        Parameters:
        ----------
        z : ndarray
            Linear combination of weights and inputs.

        Returns:
        -------
        ndarray
            Output of the sigmoid function applied element-wise.
        """
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        """
        Train the logistic regression model using gradient descent.

        Parameters:
        ----------
        X : ndarray of shape (n_samples, n_features)
            Training data.
        y : ndarray of shape (n_samples,)
            Binary target labels (0 or 1).
        """
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.num_iter):
            # Linear model prediction
            linear_model = np.dot(X, self.w) + self.b
            y_predicted = self.sigmoid(linear_model)

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update parameters
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db

    def predict_proba(self, X):
        """
        Predict class probabilities for input data.

        Parameters:
        ----------
        X : ndarray of shape (n_samples, n_features)

        Returns:
        -------
        ndarray
            Probabilities for class 1.
        """
        linear_model = np.dot(X, self.w) + self.b
        return self.sigmoid(linear_model)

    def predict(self, X):
        """
        Predict binary class labels for input data.

        Parameters:
        ----------
        X : ndarray of shape (n_samples, n_features)

        Returns:
        -------
        ndarray
            Predicted class labels (0 or 1).
        """
        y_probs = self.predict_proba(X)
        return np.where(y_probs >= 0.5, 1, 0)

## LogisticRegression implementation:

### Init:
set learning rate, iterations, initialize weights w and bias b.

### Sigmoid:
squash input z into range (0,1) using 1/(1+exp(-z)).

### Fit:
loop num_iter → compute linear model (Xw+b) → apply sigmoid → compute gradients (dw, db) → update weights and bias with gradient descent.

### Predict_proba:
return sigmoid of (Xw+b) as probabilities.

### Predict:
threshold probabilities at 0.5 → return 0 or 1 labels.

In [2]:
X = np.array([
    [1, 2],
    [2, 1],
    [2, 3],
    [3, 2],
    [3, 4],
    [4, 3]
])

y = np.array([0, 0, 0, 1, 1, 1])

In [3]:
model = LogisticRegression (learning_rate=0.01, num_iter=1000)
model.fit(X, y)

In [5]:
print("Weights:", model.w)
print("Bias:", model.b)


Weights: [ 0.777979   -0.21385172]
Bias: -0.9945904172925308


In [8]:
preds = model.predict(X)
print("Predictions:", preds)
print("True labels:", y)

Predictions: [0 1 0 1 1 1]
True labels: [0 0 0 1 1 1]


In [9]:
probs = model.predict_proba(X)
print("Predicted probabilities:", probs)

Predicted probabilities: [0.34427181 0.58601505 0.47996384 0.71333628 0.61867775 0.81393288]


In [13]:
accuracy = np.mean(preds == y)
print(f"Accuracy:, {np.round(accuracy,2)*100}%")

Accuracy:, 83.0%


In [14]:
# testing on unseen samples
X_test = np.array([
    [1, 1],   # should lean towards class 0
    [4, 4]    # should lean towards class 1
])

print("Test predictions:", model.predict(X_test))
print("Test probabilities:", model.predict_proba(X_test))

Test predictions: [0 1]
Test probabilities: [0.39401574 0.77935623]


In [15]:
## Example: Using the Class with make_classification

In [16]:
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Generate synthetic binary classification dataset
X, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_classes=2,
    random_state=42
)

# Split into training and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Initialize and train model
model = LogisticRegression(learning_rate=0.1, num_iter=1000)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
acc = accuracy_score(y_test, y_pred)

print("Test Accuracy:", acc)

Test Accuracy: 0.825
