In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=1000,
                    n_features=10,
                    n_classes=2,
                    random_state=42)


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
# Model training
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression()
logreg.fit(X_train, y_train) 

In [None]:
y_pred = logreg.predict(X_test)

In [None]:
logreg.predict_proba(X_test)

In [16]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

score = accuracy_score(y_test, y_pred)
score

0.8466666666666667

In [17]:
cm = confusion_matrix(y_test, y_pred)
cm

array([[118,  17],
       [ 29, 136]])

In [19]:
class_rp = classification_report(y_test, y_pred)
print(class_rp)

              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



## Hyperparameter tuning and cross validation

In [20]:
model = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
c_values = [0.001, 0.01, 0.1, 1, 10, 100]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
hyperparameters = dict(penalty=penalty, C=c_values, solver=solver)

In [21]:
## GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid_model = GridSearchCV(estimator=model, param_grid=hyperparameters, scoring='accuracy', cv=cv, n_jobs=-1)

In [None]:
import warnings
warnings.filterwarnings("ignore")

grid_model.fit(X_train, y_train)
best_params = grid_model.best_params_
best_score = grid_model.best_score_

print("Best Parameters:", best_params)
print("Best Score:", best_score)

Best Parameters: {'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}
Best Score: 0.8785714285714287


In [25]:
y_pred = grid_model.predict(X_test)

In [26]:
score = accuracy_score(y_test, y_pred)
score

0.8533333333333334

In [27]:
class_rp = classification_report(y_test, y_pred)
print(class_rp)

              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



In [28]:
confusion_matrix(y_test, y_pred)

array([[124,  11],
       [ 33, 132]])