### Multiclass Logistic regression OVR

In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [13]:
from sklearn.datasets import make_classification

In [14]:
## Making dataset
X, Y = make_classification(n_samples = 1000, n_features=10, n_classes=3, random_state=42)

ValueError: n_classes(3) * n_clusters_per_class(2) must be smaller or equal 2**n_informative(2)=4

In [None]:
from sklearn .model_selection import train_test_split
X_train, X_test, Y_train, Y_test = train_test_split(X,Y, test_size=0.3, random_state=42)

In [None]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(X_train, Y_train)
pred = logistic.predict(X_test)



In [None]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report
print("Score: ", accuracy_score(Y_test, pred))
print("Confusion Matrix: \n", confusion_matrix(Y_test, pred))
print("Classification Report: \n", classification_report(Y_test, pred))

Score:  0.8466666666666667
Confusion Matrix: 
 [[118  17]
 [ 29 136]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



##### Tuning hyperparameters and cross validation

In [None]:
from sklearn.model_selection import GridSearchCV
model = LogisticRegression()
penalty = ['l1','l2','elasticnet']
c_values = [100,10,1,0.1,0.01]                                                                          
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']
params = dict(penalty = penalty, C = c_values, solver = solver)
grid = GridSearchCV(estimator=model, param_grid=params, scoring='accuracy', cv = 5, n_jobs=-1)
grid.fit(X_train,Y_train)
pred=grid.predict(X_test)
print("Score: ", accuracy_score(Y_test, pred))
print("Confusion Matrix: \n", confusion_matrix(Y_test, pred))
print("Classification Report: \n", classification_report(Y_test, pred))

Score:  0.8533333333333334
Confusion Matrix: 
 [[124  11]
 [ 33 132]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users

##### Randomized search CV

In [None]:
from sklearn.model_selection import RandomizedSearchCV
model = LogisticRegression()
penalty = ['l1','l2','elasticnet']
c_values = [100,10,1,0.1,0.01]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']

params = dict(penalty = penalty, C = c_values, solver = solver)
grid = RandomizedSearchCV(estimator=model, param_distributions=params, scoring='accuracy', cv = 5, n_jobs=-1, n_iter=2100)
grid.fit(X_train, Y_train)
grid_pred = grid.predict(X_test)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\model_selection\_validation.py", line 859, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\base.py", line 1365, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "c:\Users\DELL\AppData\Local\Programs\Python\Python310\lib\site-packages\sklearn\linear_model\_logistic.py", line 1218, in fit
    solver = _check_solver(self.solver, self.penalty, self.dual)
  File "c:\Users

In [None]:
print("Score: ", accuracy_score(Y_test, grid_pred))
print("Confusion Matrix: \n", confusion_matrix(Y_test, grid_pred))
print("Classification Report: \n", classification_report(Y_test, grid_pred))

Score:  0.8533333333333334
Confusion Matrix: 
 [[124  11]
 [ 33 132]]
Classification Report: 
               precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300

