<a href="https://colab.research.google.com/github/priyansuapk/ml-lab-exam/blob/main/logistic_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import KFold
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler

# Load Breast Cancer dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Normalize the dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Logistic Regression from Scratch
class LogisticRegressionScratch:
    def __init__(self, lr=0.01, n_iter=1000):
        self.lr = lr
        self.n_iter = n_iter

    def sigmoid(self, z):
        return 1 / (1 + np.exp(-z))

    def fit(self, X, y):
        self.weights = np.zeros(X.shape[1])
        self.bias = 0

        for _ in range(self.n_iter):
            model = self.sigmoid(np.dot(X, self.weights) + self.bias)
            dw = np.dot(X.T, (model - y)) / len(y)
            db = np.sum(model - y) / len(y)

            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        return (self.sigmoid(linear_model) >= 0.5).astype(int)

# Perform 5-Fold Cross-Validation
kf = KFold(n_splits=5, shuffle=True, random_state=42)
scratch_accuracies = []
builtin_accuracies = []

for train_index, test_index in kf.split(X_scaled):
    X_train, X_test = X_scaled[train_index], X_scaled[test_index]
    y_train, y_test = y[train_index], y[test_index]

    # Logistic Regression from Scratch
    logreg_scratch = LogisticRegressionScratch(lr=0.01, n_iter=1000)
    logreg_scratch.fit(X_train, y_train)
    y_pred_scratch = logreg_scratch.predict(X_test)
    scratch_accuracies.append(accuracy_score(y_test, y_pred_scratch))

    # Logistic Regression using Scikit-learn
    logreg_builtin = LogisticRegression(max_iter=1000)
    logreg_builtin.fit(X_train, y_train)
    y_pred_builtin = logreg_builtin.predict(X_test)
    builtin_accuracies.append(accuracy_score(y_test, y_pred_builtin))

# Calculate Average Accuracies
scratch_avg_accuracy = np.mean(scratch_accuracies)
builtin_avg_accuracy = np.mean(builtin_accuracies)

# Display Results
print("Logistic Regression from Scratch - Average Accuracy: {:.4f}".format(scratch_avg_accuracy))
print("Scikit-learn Logistic Regression - Average Accuracy: {:.4f}".format(builtin_avg_accuracy))


Logistic Regression from Scratch - Average Accuracy: 0.9771
Scikit-learn Logistic Regression - Average Accuracy: 0.9771
