In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sb
%matplotlib inline

In [3]:
# Implementing Logistic Regression
# Importing the dataset

from sklearn.datasets import make_classification

In [None]:
# Create dataset
# X = Independent feature
# y = Dependent feature
# n_samples = Total number of samples
# n_features = Total number of features
# n_classes = Total number of classes
# random_state = Random state for reproducibility
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [13]:
# Train test Split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [14]:
# Implementing Logistic Regression
# Model Training
from sklearn.linear_model import LogisticRegression
log_reg_model = LogisticRegression()
log_reg_model.fit(X_train, y_train)

In [15]:
y_pred = log_reg_model.predict(X_test)

In [24]:
# Peformance Metrics
# Accuracy
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
score = accuracy_score(y_test, y_pred)
print("Accuracy: %.3f%%" % (score*100.0))
# Confusion Matrix
matrix = confusion_matrix(y_test, y_pred)
print(matrix)
# Classification Report
report = classification_report(y_test, y_pred)
print(report)

Accuracy: 84.667%
[[118  17]
 [ 29 136]]
              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



# Hyperparameters Tuning and Cross Validation


In [36]:
model_hpt = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
C = [0.0001, 0.001, 0.01, 0.1, 1, 10, 100, 1000]
solver = ['newton-cg', 'lbfgs','liblinear', 'saga', 'sag']

hyperparameters = dict(penalty=penalty, C=C, solver=solver)

In [37]:
# Grid Search CV Logistic Regression Model
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold 

cv = StratifiedKFold()
grid = GridSearchCV(estimator=model_hpt, param_grid=hyperparameters, scoring='accuracy',cv=cv)

In [38]:
grid

In [39]:
grid.fit(X_train, y_train)

320 fits failed out of a total of 600.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
40 fits failed with the following error:
Traceback (most recent call last):
  File "/opt/anaconda3/envs/myenv/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/opt/anaconda3/envs/myenv/lib/python3.12/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/opt/anaconda3/envs/myenv/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1193, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^^^^

In [40]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

In [41]:
grid.best_score_

np.float64(0.8785714285714287)

In [42]:
y_pred = grid.predict(X_test)

In [44]:
score = accuracy_score(y_pred, y_test)
print(score)
print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))

0.8533333333333334
              precision    recall  f1-score   support

           0       0.92      0.79      0.85       157
           1       0.80      0.92      0.86       143

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300

[[124  33]
 [ 11 132]]
