In [6]:
import numpy as np 
import pandas as pd 

In [1]:
# Compute the sigmoid function using the formula: 1 / (1 + e^(-z)).
def sigmoid(z):
    return 1 / (1 + np.exp(-z))

In [13]:
class LogisticRegression:
    def __init__(self, learning_rate=0.01, max_iter=10000):
        self.learning_rate = learning_rate
        self.max_iter = max_iter
        self.W = None
        self.b = 0
        self.loss_history = []  # To track loss over iterations

    def compute_loss(self, y_true, y_pred):
        # Cross-entropy loss formula
        epsilon = 1e-8  # Small value to avoid log(0)
        loss = -np.mean(y_true * np.log(y_pred + epsilon) + (1 - y_true) * np.log(1 - y_pred + epsilon))
        return loss

    def fit(self, X, y):
        # Initialize parameters
        n_samples, n_features = X.shape
        self.W = np.zeros(n_features)
        
        # Gradient Descent
        for i in range(self.max_iter):
            # Forward pass
            linear_model = np.dot(X, self.W) + self.b
            predictions = sigmoid(linear_model)
            
            # Compute loss (cross-entropy)
            loss = self.compute_loss(y, predictions)
            self.loss_history.append(loss)  # Track loss
            
            # Compute gradients
            dw = (1/n_samples) * np.dot(X.T, (predictions - y))
            db = (1/n_samples) * np.sum(predictions - y)
            
            # Update parameters
            self.W -= self.learning_rate * dw
            self.b -= self.learning_rate * db

            # Print loss every 1000 iterations
            if i % 1000 == 0:
                print(f"Iteration {i}, Loss: {loss}")

    def predict_prob(self, X):
        return sigmoid(np.dot(X, self.W) + self.b)
    
    def predict(self, X, threshold=0.5):
        return (self.predict_prob(X) >= threshold).astype(int)

In [14]:
# Example usage with synthetic data
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

In [15]:
# Generate data
X, y = make_classification(n_samples=1000, n_features=2, n_redundant=0, n_informative=2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [20]:
# Train the model
model = LogisticRegression(learning_rate=0.1, max_iter=10000)
model.fit(X_train, y_train)

Iteration 0, Loss: 0.6931471605599454
Iteration 1000, Loss: 0.3427010354319957
Iteration 2000, Loss: 0.34270092509694755
Iteration 3000, Loss: 0.3427009250963203
Iteration 4000, Loss: 0.34270092509632016
Iteration 5000, Loss: 0.3427009250963202
Iteration 6000, Loss: 0.3427009250963202
Iteration 7000, Loss: 0.3427009250963202
Iteration 8000, Loss: 0.3427009250963202
Iteration 9000, Loss: 0.3427009250963202


In [21]:
# Predict
y_pred = model.predict(X_test)

In [22]:
# Calculate accuracy
accuracy = np.mean(y_pred == y_test)
print(f"Accuracy: {accuracy:.2f}")

Accuracy: 0.88
