In [2]:
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [3]:
X, y = load_breast_cancer(return_X_y=True)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [13]:
logistic_model = LogisticRegression(max_iter=10000)
logistic_model.fit(X_train, y_train)

In [14]:
print("-------Baseline Logistic Regression -------")
print("Coefficients:", logistic_model.coef_)
print("Intercept:", logistic_model.intercept_)

-------Baseline Logistic Regression -------
Coefficients: [[ 1.0274368   0.22145051 -0.36213488  0.0254667  -0.15623532 -0.23771256
  -0.53255786 -0.28369224 -0.22668189 -0.03649446 -0.09710208  1.3705667
  -0.18140942 -0.08719575 -0.02245523  0.04736092 -0.04294784 -0.03240188
  -0.03473732  0.01160522  0.11165329 -0.50887722 -0.01555395 -0.016857
  -0.30773117 -0.77270908 -1.42859535 -0.51092923 -0.74689363 -0.10094404]]
Intercept: [28.64871395]


In [15]:
y_train_pred = logistic_model.predict(X_train)
y_test_pred = logistic_model.predict(X_test)

In [16]:
print("Train Accuracy:", accuracy_score(y_train, y_train_pred))
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_test_pred))

Train Accuracy: 0.9582417582417583
Test Accuracy: 0.956140350877193
Confusion Matrix:
 [[39  4]
 [ 1 70]]


In [18]:
param_grid = {
    "C": np.logspace(-3, 3, 7),
    "penalty": ["l1", "l2"],
    "solver": ["liblinear"]
}

logistic_cv = GridSearchCV(
    LogisticRegression(max_iter=10000),
    param_grid,
    cv=5,
    scoring="accuracy",
    n_jobs=-1
)

logistic_cv.fit(X_train, y_train)

In [19]:
print("\n---------Logistic Regression Hyperparameter Tuning---------")
print("Best Parameters:", logistic_cv.best_params_)
print("Best CV Accuracy:", logistic_cv.best_score_)


---------Logistic Regression Hyperparameter Tuning---------
Best Parameters: {'C': np.float64(100.0), 'penalty': 'l1', 'solver': 'liblinear'}
Best CV Accuracy: 0.9670329670329672


In [21]:
best_model = logistic_cv.best_estimator_
y_test_best = best_model.predict(X_test)

In [20]:
print("Final Test Accuracy:", accuracy_score(y_test, y_test_best))
print("Classification Report:\n", classification_report(y_test, y_test_best))

Final Test Accuracy: 0.9824561403508771
Classification Report:
               precision    recall  f1-score   support

           0       0.98      0.98      0.98        43
           1       0.99      0.99      0.99        71

    accuracy                           0.98       114
   macro avg       0.98      0.98      0.98       114
weighted avg       0.98      0.98      0.98       114

