In [64]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression as SKLogistic
from mlf.linear_model.logistic_regression import LogisticRegression

In [65]:
# Load Breast Cancer dataset
breast_cancer = load_breast_cancer()
breast_cancer_df = pd.DataFrame(data=breast_cancer.data, columns=breast_cancer.feature_names)
breast_cancer_df['target'] = breast_cancer.target

In [66]:
# Split Breast Cancer dataset into training and testing sets
X_train_bc, X_test_bc, y_train_bc, y_test_bc = train_test_split(
    breast_cancer_df.drop(columns=['target']), breast_cancer_df['target'], test_size=0.2, random_state=42)

In [67]:
# Standardize features
scaler = StandardScaler()
X_train_bc_scaled = scaler.fit_transform(X_train_bc)
X_test_bc_scaled = scaler.fit_transform(X_test_bc)

In [68]:
# --- 2. Train your Logistic Regression ---
my_model = LogisticRegression(lr=0.01, epochs=50)
my_model.fit(X_train_bc_scaled, y_train_bc)
my_acc = my_model.score(X_test_bc_scaled, y_test_bc)
print("My Logistic Regression Accuracy:", my_acc)
print("My weights:", my_model.w)

My Logistic Regression Accuracy: 0.9736842105263158
My weights: [-0.10345801 -0.06541895 -0.10465306 -0.09902202 -0.04934657 -0.07432937
 -0.09049464 -0.10868211 -0.04420108  0.01334502 -0.0736874   0.00272501
 -0.0703121  -0.06957169  0.01159938 -0.02092692 -0.01569981 -0.04309822
  0.00496367  0.00918343 -0.11162443 -0.07509931 -0.11156328 -0.10367599
 -0.06366386 -0.07923339 -0.08830153 -0.11169322 -0.06639774 -0.03929776]


In [69]:
# --- 3. Train scikit-learn Logistic Regression ---
sk_model = SKLogistic(max_iter=50)
sk_model.fit(X_train_bc_scaled, y_train_bc)
sk_acc = sk_model.score(X_test_bc_scaled, y_test_bc)
print("Sklearn Logistic Regression Accuracy:", sk_acc)
print("Sklearn weights:", sk_model.coef_)

Sklearn Logistic Regression Accuracy: 0.9824561403508771
Sklearn weights: [[-0.43190368 -0.38732553 -0.39343248 -0.46521006 -0.07166728  0.54016395
  -0.8014581  -1.11980408  0.23611852  0.07592093 -1.26817815  0.18887738
  -0.61058302 -0.9071857  -0.31330675  0.68249145  0.17527452 -0.3112999
   0.50042502  0.61622993 -0.87984024 -1.35060559 -0.58945273 -0.84184594
  -0.54416967  0.01611019 -0.94305313 -0.77821726 -1.20820031 -0.15741387]]


In [70]:
# --- 4. Compare weights (optional, allow some tolerance) ---
weights_close = np.allclose(my_model.w, sk_model.coef_.flatten(), atol=0.5)
print("Weights close to sklearn?", weights_close)

Weights close to sklearn? False


In [71]:
# --- 5. Assert accuracy is close ---
assert abs(my_acc - sk_acc) < 0.05, "Your model accuracy differs too much from sklearn!"

In [72]:
print(sk_acc - my_acc)

0.00877192982456132
