In [65]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as nlp
import seaborn as sns

In [66]:
from sklearn.datasets import make_classification

In [67]:
x, y = make_classification(n_samples = 1000, n_classes=2, n_features=10, random_state=42)

In [68]:
from sklearn.model_selection import train_test_split

In [69]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.20, random_state=15)

In [70]:
from sklearn.linear_model import LogisticRegression

In [71]:
reg = LogisticRegression()
reg.fit(x_train, y_train)
y_pred = reg.predict(x_test)
print(y_pred)

[0 0 1 0 0 0 0 0 0 0 0 1 1 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 1 1 0 0 0 0 0 0
 0 0 0 1 1 0 1 0 0 0 1 0 0 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 1
 1 1 1 1 0 0 0 0 1 0 0 1 0 1 1 0 0 1 0 0 0 1 0 1 1 0 1 1 0 0 1 0 1 0 0 1 0
 0 1 0 1 0 1 0 0 0 0 0 1 0 1 0 0 1 1 1 1 1 1 1 1 1 0 1 1 1 0 0 1 0 1 1 0 0
 1 0 0 0 1 1 0 1 0 0 1 0 0 0 0 1 1 0 1 1 1 1 1 0 1 1 1 0 0 0 1 0 0 0 1 0 1
 0 0 1 1 0 1 0 0 1 0 0 1 0 0 0]


In [72]:

from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [73]:
score = accuracy_score(y_pred, y_test)
print(score)
print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))

0.84
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       108
           1       0.83      0.82      0.82        92

    accuracy                           0.84       200
   macro avg       0.84      0.84      0.84       200
weighted avg       0.84      0.84      0.84       200

[[93 15]
 [17 75]]


In [74]:
# Hyperparameter Tuning

In [75]:
model = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
c_values = [100, 10, 1, 0.1, 0.01, 0.001]
solver = ['lbfgs', 'newton-cg', 'newton-cholesky', 'sag', 'saga']


In [76]:
params = [
    # l1 → only with liblinear or saga
    {'penalty': ['l1'], 'C': c_values, 'solver': ['liblinear', 'saga']},

    # l2 → with all solvers
    {'penalty': ['l2'], 'C': c_values, 'solver': solver},

    # elasticnet → only with saga and must include l1_ratio
    {'penalty': ['elasticnet'], 'C': c_values, 'solver': ['saga'], 'l1_ratio': [0.5, 0.7, 0.9]}
]


In [77]:
from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [78]:
## GridSearchCV
from sklearn.model_selection import GridSearchCV

In [79]:
grid = GridSearchCV(estimator=model, param_grid=params, n_jobs=-1, cv=cv, scoring = "accuracy", verbose=3, error_score='raise')

In [80]:
grid.fit(x_train, y_train)

Fitting 5 folds for each of 60 candidates, totalling 300 fits
[CV 3/5] END C=100, penalty=l1, solver=liblinear;, score=0.850 total time=   0.0s
[CV 2/5] END C=100, penalty=l1, solver=liblinear;, score=0.869 total time=   0.0s
[CV 5/5] END C=100, penalty=l1, solver=liblinear;, score=0.900 total time=   0.0s
[CV 4/5] END C=100, penalty=l1, solver=liblinear;, score=0.875 total time=   0.0s
[CV 1/5] END ....C=100, penalty=l1, solver=saga;, score=0.825 total time=   0.0s
[CV 1/5] END C=100, penalty=l1, solver=liblinear;, score=0.825 total time=   0.0s
[CV 2/5] END ....C=100, penalty=l1, solver=saga;, score=0.869 total time=   0.0s
[CV 3/5] END ....C=100, penalty=l1, solver=saga;, score=0.850 total time=   0.0s
[CV 1/5] END C=10, penalty=l1, solver=liblinear;, score=0.825 total time=   0.0s
[CV 4/5] END ....C=100, penalty=l1, solver=saga;, score=0.875 total time=   0.0s
[CV 5/5] END ....C=100, penalty=l1, solver=saga;, score=0.900 total time=   0.0s
[CV 2/5] END C=10, penalty=l1, solver=libl



[CV 5/5] END ...C=100, penalty=l2, solver=lbfgs;, score=0.900 total time=   0.0s
[CV 3/5] END C=100, penalty=l2, solver=newton-cg;, score=0.850 total time=   0.0s
[CV 4/5] END C=100, penalty=l2, solver=newton-cg;, score=0.875 total time=   0.0s
[CV 1/5] END C=100, penalty=l2, solver=newton-cg;, score=0.825 total time=   0.0s
[CV 5/5] END C=100, penalty=l2, solver=newton-cg;, score=0.900 total time=   0.0s
[CV 1/5] END C=100, penalty=l2, solver=newton-cholesky;, score=0.825 total time=   0.0s
[CV 2/5] END C=100, penalty=l2, solver=newton-cg;, score=0.869 total time=   0.0s
[CV 2/5] END C=100, penalty=l2, solver=newton-cholesky;, score=0.869 total time=   0.0s
[CV 5/5] END C=100, penalty=l2, solver=newton-cholesky;, score=0.900 total time=   0.0s
[CV 3/5] END C=100, penalty=l2, solver=newton-cholesky;, score=0.850 total time=   0.0s
[CV 2/5] END .....C=100, penalty=l2, solver=sag;, score=0.869 total time=   0.0s
[CV 1/5] END ....C=100, penalty=l2, solver=saga;, score=0.825 total time=   



In [81]:
grid.best_params_

{'C': 0.01, 'l1_ratio': 0.5, 'penalty': 'elasticnet', 'solver': 'saga'}

In [82]:
grid.best_score_

np.float64(0.86875)

In [83]:
y_pred = grid.predict(x_test)

In [84]:

from sklearn.metrics import accuracy_score,classification_report,confusion_matrix

In [85]:
score = accuracy_score(y_pred, y_test)
print(score)
print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))

0.875
              precision    recall  f1-score   support

           0       0.89      0.88      0.89       111
           1       0.86      0.87      0.86        89

    accuracy                           0.88       200
   macro avg       0.87      0.87      0.87       200
weighted avg       0.88      0.88      0.88       200

[[98 13]
 [12 77]]


In [86]:
## Randomized Search CV
from sklearn.model_selection import RandomizedSearchCV

In [87]:
model = LogisticRegression()
rcv = RandomizedSearchCV(estimator= model, param_distributions= params, cv = cv, scoring="accuracy")

In [88]:
rcv.fit(x_train, y_train)



In [89]:
rcv.best_score_

np.float64(0.865)

In [90]:
rcv.best_params_

{'solver': 'newton-cg', 'penalty': 'l2', 'C': 1}

In [91]:
y_pred = rcv.predict(x_test)

In [None]:
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred,y_test))
print(confusion_matrix(y_pred,y_test))

0.84
              precision    recall  f1-score   support

           0       0.85      0.86      0.85       108
           1       0.83      0.82      0.82        92

    accuracy                           0.84       200
   macro avg       0.84      0.84      0.84       200
weighted avg       0.84      0.84      0.84       200

[[93 15]
 [17 75]]


Exception ignored in: <function ResourceTracker.__del__ at 0x10ee19b20>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x110a6db20>
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 82, in __del__
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 91, in _stop
  File "/opt/anaconda3/lib/python3.13/multiprocessing/resource_tracker.py", line 116, in _stop_locked
ChildProcessError: [Errno 10] No child processes
Exception ignored in: <function ResourceTracker.__del__ at 0x103f3db20>
Traceback (most recent call last