In [2]:
import numpy as np

class LogisticRegression:
    def __init__(self, learning_rate=0.01, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.w = None
        self.b = None

    # Sigmoid function
    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    # Fit model to data using gradient descent
    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)  # Initialize weights
        self.b = 0  # Initialize bias

        # Gradient Descent
        for _ in range(self.epochs):
            # Linear model
            linear_model = np.dot(X, self.w) + self.b
            # Apply sigmoid function
            y_pred = self.sigmoid(linear_model)

            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            # Update weights and bias
            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db

    # Predict probabilities
    def predict_proba(self, X):
        linear_model = np.dot(X, self.w) + self.b
        return self.sigmoid(linear_model)

    # Predict binary class labels (0 or 1)
    def predict(self, X):
        y_pred_proba = self.predict_proba(X)
        return [1 if i > 0.5 else 0 for i in y_pred_proba]

# Example usage
# Sample binary classification dataset
X = np.array([[1, 2], [2, 3], [3, 4], [4, 5], [5, 6]])
y = np.array([0, 0, 0, 1, 1])

# Initialize and train the model
model = LogisticRegression(learning_rate=0.1, epochs=1000)
model.fit(X, y)

# Make predictions
predictions = model.predict(X)
probabilities = model.predict_proba(X)

# Output results
print("Weight (w):", model.w)
print("Bias (b):", model.b)
print("Predicted probabilities:", probabilities)
print("Predicted classes:", predictions)


Weight (w): [ 3.4212929  -1.51821301]
Bias (b): -4.939505917232352
Predicted probabilities: [0.01040881 0.06589298 0.32115275 0.76035024 0.95511296]
Predicted classes: [0, 0, 0, 1, 1]


In [8]:
import numpy as np
from sklearn.datasets import make_classification, load_iris
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Logistic Regression class (from previous implementation)
class LogisticRegression:
    def __init__(self, learning_rate=0.1, epochs=1000):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.w = None
        self.b = None

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        n_samples, n_features = X.shape
        self.w = np.zeros(n_features)
        self.b = 0

        for _ in range(self.epochs):
            linear_model = np.dot(X, self.w) + self.b
            y_pred = self.sigmoid(linear_model)

            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)

            self.w -= self.learning_rate * dw
            self.b -= self.learning_rate * db

    def predict_proba(self, X):
        linear_model = np.dot(X, self.w) + self.b
        return self.sigmoid(linear_model)

    def predict(self, X):
        y_pred_proba = self.predict_proba(X)
        return [1 if i > 0.5 else 0 for i in y_pred_proba]

# Function to compare custom and sklearn logistic regression models
def compare_models(X, y):
    # Split into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

    # Initialize and train the custom logistic regression model
    custom_model = LogisticRegression(learning_rate=0.1, epochs=1000)
    custom_model.fit(X_train, y_train)
    y_pred_custom = custom_model.predict(X_test)

    # Initialize and train the sklearn logistic regression model
    sklearn_model = SklearnLogisticRegression()
    sklearn_model.fit(X_train, y_train)
    y_pred_sklearn = sklearn_model.predict(X_test)

    # Calculate metrics for custom model
    custom_accuracy = accuracy_score(y_test, y_pred_custom)
    custom_precision = precision_score(y_test, y_pred_custom)
    custom_recall = recall_score(y_test, y_pred_custom)
    custom_f1 = f1_score(y_test, y_pred_custom)

    # Calculate metrics for sklearn model
    sklearn_accuracy = accuracy_score(y_test, y_pred_sklearn)
    sklearn_precision = precision_score(y_test, y_pred_sklearn)
    sklearn_recall = recall_score(y_test, y_pred_sklearn)
    sklearn_f1 = f1_score(y_test, y_pred_sklearn)

    print("Custom Model Metrics:")
    print("Accuracy:", custom_accuracy)
    print("Precision:", custom_precision)
    print("Recall:", custom_recall)
    print("F1 Score:", custom_f1)

    print("\nScikit-Learn Model Metrics:")
    print("Accuracy:", sklearn_accuracy)
    print("Precision:", sklearn_precision)
    print("Recall:", sklearn_recall)
    print("F1 Score:", sklearn_f1)

# Test on synthetic dataset
print("Synthetic Dataset:")
X_synthetic, y_synthetic = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)
compare_models(X_synthetic, y_synthetic)

# Test on the Iris dataset
print("\nIris Dataset:")
iris = load_iris()
X_iris = iris.data[iris.target != 0]  # Use only two classes for binary classification
y_iris = iris.target[iris.target != 0] - 1  # Convert to binary (0 and 1)
compare_models(X_iris, y_iris)


Synthetic Dataset:
Custom Model Metrics:
Accuracy: 0.855
Precision: 0.9148936170212766
Recall: 0.8037383177570093
F1 Score: 0.8557213930348259

Scikit-Learn Model Metrics:
Accuracy: 0.855
Precision: 0.9148936170212766
Recall: 0.8037383177570093
F1 Score: 0.8557213930348259

Iris Dataset:
Custom Model Metrics:
Accuracy: 0.85
Precision: 0.7272727272727273
Recall: 1.0
F1 Score: 0.8421052631578947

Scikit-Learn Model Metrics:
Accuracy: 0.95
Precision: 0.8888888888888888
Recall: 1.0
F1 Score: 0.9411764705882353
