In [1]:
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
x, y = make_classification(n_samples=1000, n_features=10, n_informative=5, n_redundant=5, n_classes=2, random_state=1)

In [12]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x,y,test_size=0.20,random_state=42)

# Model Training Hyperparameter Tuning
## GridSearchCV

In [13]:
from sklearn.model_selection import GridSearchCV
import warnings
warnings.filterwarnings('ignore')

In [14]:
parameters = {'penalty' : ('l1', 'l2', 'elasticnet'), 'C':[1,10,20,30]}

In [15]:
classifier=LogisticRegression()

In [16]:
clf=GridSearchCV(classifier, param_grid=parameters,cv=5)

In [17]:
clf.fit(x_train,y_train)

GridSearchCV(cv=5, estimator=LogisticRegression(),
             param_grid={'C': [1, 10, 20, 30],
                         'penalty': ('l1', 'l2', 'elasticnet')})

In [18]:
clf.best_params_

{'C': 1, 'penalty': 'l2'}

In [19]:
clf.best_score_

0.8087500000000001

In [20]:
classifier=LogisticRegression(C=1, penalty='l2')

In [21]:
classifier.fit(x_train,y_train)

LogisticRegression(C=1)

In [23]:
y_pred=classifier.predict(x_test)

In [25]:
from sklearn.metrics import accuracy_score, classification_report
print(accuracy_score(y_test,y_pred))
print(classification_report(y_test,y_pred))

0.79
              precision    recall  f1-score   support

           0       0.73      0.86      0.79        91
           1       0.86      0.73      0.79       109

    accuracy                           0.79       200
   macro avg       0.79      0.80      0.79       200
weighted avg       0.80      0.79      0.79       200



In [26]:
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold
cv=KFold(n_splits=5)
cross_val_score(classifier, x_train,y_train,cv=cv)

array([0.80625, 0.78125, 0.79375, 0.8125 , 0.85625])

In [27]:
import numpy as np
np.mean(cross_val_score(classifier, x_train,y_train,cv=cv))

0.8099999999999999

In [29]:
classifier.predict(x_test)

array([0, 1, 1, 1, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0,
       1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 0, 0,
       0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 0,
       0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0,
       0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0,
       0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 1,
       0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1,
       0, 1])

In [28]:
classifier.predict_proba(x_test)

array([[0.71649202, 0.28350798],
       [0.19508969, 0.80491031],
       [0.12418141, 0.87581859],
       [0.05045906, 0.94954094],
       [0.88775659, 0.11224341],
       [0.75067497, 0.24932503],
       [0.97980488, 0.02019512],
       [0.3921745 , 0.6078255 ],
       [0.59920135, 0.40079865],
       [0.39295203, 0.60704797],
       [0.20428696, 0.79571304],
       [0.80257879, 0.19742121],
       [0.86422932, 0.13577068],
       [0.92665682, 0.07334318],
       [0.00131743, 0.99868257],
       [0.04171096, 0.95828904],
       [0.56288536, 0.43711464],
       [0.89322764, 0.10677236],
       [0.29278211, 0.70721789],
       [0.00870994, 0.99129006],
       [0.71879454, 0.28120546],
       [0.5108267 , 0.4891733 ],
       [0.76230298, 0.23769702],
       [0.73170811, 0.26829189],
       [0.10155737, 0.89844263],
       [0.04046512, 0.95953488],
       [0.57926768, 0.42073232],
       [0.00526468, 0.99473532],
       [0.03101648, 0.96898352],
       [0.96093035, 0.03906965],
       [0.

In [30]:
from sklearn.model_selection import RandomizedSearchCV

In [33]:
rscv=RandomizedSearchCV(LogisticRegression(), param_distributions=parameters,n_iter=20,cv=5 )

In [36]:
rscv.fit(x_train,y_train)

RandomizedSearchCV(cv=5, estimator=LogisticRegression(), n_iter=20,
                   param_distributions={'C': [1, 10, 20, 30],
                                        'penalty': ('l1', 'l2', 'elasticnet')})

In [37]:
rscv.best_params_

{'penalty': 'l2', 'C': 1}