## Import Libraries

In [430]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [431]:
## creating dataset
from sklearn.datasets import make_classification

In [432]:
## creating dataset 
X,y = make_classification(n_samples=1000,n_features=10,n_classes=2,random_state=15)

## Train Test Split

In [434]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

## Model Training

In [436]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [437]:
logistic

In [438]:
logistic.fit(X_train,y_train)

## Prediction values

In [440]:
y_pred = logistic.predict(X_test)

## Performance metrics

In [442]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [443]:
score  = accuracy_score(y_test,y_pred)
print(score)

0.9166666666666666


In [444]:
cm  = confusion_matrix(y_test,y_pred)
print(cm)

[[146  11]
 [ 14 129]]


In [445]:
rp  = classification_report(y_test,y_pred)
print(rp)

              precision    recall  f1-score   support

           0       0.91      0.93      0.92       157
           1       0.92      0.90      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Hyperparameter Tunning and Cross Validation

In [447]:
model = LogisticRegression()

In [448]:
## select the parameters and the values list
penalty = ['l1', 'l2', 'elasticnet']
c_values = [100,10,1.0,0.1,0.01]
solver=['lbfgs', 'liblinear','newton-cg', 'newton-cholesky', 'sag', 'sag']


In [449]:
## create a dict
params = dict(penalty=penalty,C=c_values,solver=solver)
params

{'penalty': ['l1', 'l2', 'elasticnet'],
 'C': [100, 10, 1.0, 0.1, 0.01],
 'solver': ['lbfgs',
  'liblinear',
  'newton-cg',
  'newton-cholesky',
  'sag',
  'sag']}

## Using GridSearchCV for Hyperparameter tuning

In [451]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()


In [452]:
from sklearn.model_selection import GridSearchCV
grid = GridSearchCV(estimator=model, param_grid=params,scoring='accuracy', n_jobs=-1,cv=cv)

In [453]:
grid

In [454]:
grid.fit(X_train,y_train)

275 fits failed out of a total of 450.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
           

In [455]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'liblinear'}

In [456]:
grid.best_score_

0.9228571428571428

In [457]:
y_pred = grid.predict(X_test)

In [458]:
score  = accuracy_score(y_test,y_pred)
print(score)
cm  = confusion_matrix(y_test,y_pred)
print(cm)
rp  = classification_report(y_test,y_pred)
print(rp)

0.9266666666666666
[[150   7]
 [ 15 128]]
              precision    recall  f1-score   support

           0       0.91      0.96      0.93       157
           1       0.95      0.90      0.92       143

    accuracy                           0.93       300
   macro avg       0.93      0.93      0.93       300
weighted avg       0.93      0.93      0.93       300



## Randomized SearchCV

In [460]:
from sklearn.model_selection import RandomizedSearchCV

In [461]:
model = LogisticRegression()

In [462]:
randomcv = RandomizedSearchCV(estimator=model, param_distributions=params,scoring='accuracy',cv=5)
randomcv

In [463]:
randomcv.fit(X_train,y_train)

30 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
5 fits failed with the following error:
Traceback (most recent call last):
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/model_selection/_validation.py", line 888, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/base.py", line 1473, in wrapper
    return fit_method(estimator, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/home/nikeshgamal/anaconda3/lib/python3.12/site-packages/sklearn/linear_model/_logistic.py", line 1194, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
             ^

In [464]:
randomcv.best_score_

0.9128571428571428

In [465]:
randomcv.best_params_

{'solver': 'liblinear', 'penalty': 'l1', 'C': 100}

In [466]:
y_pred = randomcv.predict(X_test)

In [467]:
score  = accuracy_score(y_test,y_pred)
print(score)
cm  = confusion_matrix(y_test,y_pred)
print(cm)
rp  = classification_report(y_test,y_pred)
print(rp)

0.9166666666666666
[[146  11]
 [ 14 129]]
              precision    recall  f1-score   support

           0       0.91      0.93      0.92       157
           1       0.92      0.90      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Logistic Regression For MultiClass Classification Problem

In [510]:
## creating dataset
from sklearn.datasets import make_classification
## creating dataset 
X,y = make_classification(n_samples=1000,n_features=10,n_informative=3,n_classes=3,random_state=15)

In [512]:
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.30,random_state=42)

In [514]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(X_train,y_train)
y_pred = logistic.predict(X_test)



In [518]:
score  = accuracy_score(y_test,y_pred)
print(score)
rp  = classification_report(y_test,y_pred)
print(rp)
cm  = confusion_matrix(y_test,y_pred)
print(cm)

0.79
              precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300

[[84 10  8]
 [ 3 74 25]
 [10  7 79]]
