 # Logistic Regression with Batch Gradient Descent (Scikit-learn)

 ## Introduction

 This notebook uses scikit-learn's Batch GD for Logistic Regression.

 ## Data Loading 

 Load  the data.

In [None]:
import os
import sys

import numpy as np

# Set project root directory and add it to the system path
project_root = os.path.abspath(os.path.join(os.getcwd(), "..", "..", ".."))
sys.path.append(project_root)


from src.scratch.utils.viz_utils import plot_scatter_for_classification

X_train = np.load("../../../data/processed/breast_cancer_X_train.npy")
X_test = np.load("../../../data/processed/breast_cancer_X_test.npy")
y_train = np.load("../../../data/processed/breast_cancer_y_train.npy")
y_test = np.load("../../../data/processed/breast_cancer_y_test.npy")

print("Training features shape:", X_train.shape)
print("Test features shape:", X_test.shape)
print("Training target shape:", y_train.shape)
print("Test target shape:", y_test.shape)

 ## Exploratory Data Analysis

 Visualize two features.

In [None]:
if X_train.shape[1] >= 2:
    plot_scatter_for_classification(X_train, y_train, feature_indices=(0, 1), title="Feature 1 vs Feature 2", filename="feature1_vs_feature2_batch_sk.png")


 ## Model Initialization

 Initialize with Batch GD.

In [None]:
from src.sklearn_impl.logistic_regression_sk import LogisticRegressionSK

model = LogisticRegressionSK(method='batch_gd', n_iterations=20000000000, learning_rate=0.000001, early_stopping=True, verbose=True)


 ## Training

 Train and time the model.

In [None]:
import time

start_time = time.time()
model.fit(X_train, y_train)
training_time = time.time() - start_time
print(f"Training Time: {training_time:.4f} seconds")


 ## Evaluation

 Calculate metrics.

In [None]:
from src.scratch.utils.metrics import accuracy, precision_score, recall_score, f1_score, roc_auc_score

y_pred = model.predict(X_test)
y_scores = model.predict_proba(X_test)[:, 1]

acc = accuracy(y_test, y_pred)
prec = precision_score(y_test, y_pred)
rec = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
roc_auc = roc_auc_score(y_test, y_scores)

print(f"Accuracy: {acc:.4f}")
print(f"Precision: {prec:.4f}")
print(f"Recall: {rec:.4f}")
print(f"F1 Score: {f1:.4f}")
print(f"ROC AUC: {roc_auc:.4f}")


 ## Visualizations

 Visualize results.

In [None]:
from src.scratch.utils.viz_utils import (
    plot_confusion_matrix,
    plot_decision_boundary,
    plot_learning_curve,
    plot_roc_curve,
    plot_precision_recall_curve
)

plot_learning_curve(
    model.get_loss_history(),
    title="Learning Curve (Batch GD SK)",
    filename="learning_curve_batch_sk_log.png",
)
if X_test.shape[1] == 2:
    plot_decision_boundary(
        model,
        X_test,
        y_test,
        title="Decision Boundary (Batch GD SK)",
        filename="decision_boundary_batch_sk.png",
    )
plot_confusion_matrix(
    y_test,
    y_pred,
    title="Confusion Matrix (Batch GD SK)",
    filename="confusion_matrix_batch_sk.png",
)
plot_roc_curve(
    y_test, y_scores, title="ROC Curve (Batch GD SK)", filename="roc_curve_batch_sk.png"
)
plot_precision_recall_curve(y_test, y_scores)

 ## Conclusion

 The scikit-learn Batch GD model achieved accuracy {acc:.4f}, F1 {f1:.4f}, and ROC AUC {roc_auc:.4f}.