In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [3]:
from sklearn.datasets import make_classification

In [27]:
# creating dataset

X,y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=15)

In [28]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [29]:
# model training

from sklearn.linear_model import LogisticRegression
logistics = LogisticRegression()

In [30]:
logistics.fit(X_train,y_train)

In [31]:
y_pred = logistics.predict(X_test)

In [32]:
from sklearn.metrics import confusion_matrix,accuracy_score,classification_report

In [33]:
score=accuracy_score(y_test,y_pred)
print("Accuracy of the model is: ", score)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy of the model is:  0.9166666666666666
Confusion Matrix:
 [[146  11]
 [ 14 129]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.93      0.92       157
           1       0.92      0.90      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



### Hyperparamter Tuning and Cross Validation

### Grid SearchCV

In [34]:
model = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
c_values = [0.01,0.1,1,10,100]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']


In [35]:
params = dict(penalty=penalty, C=c_values, solver=solver)

In [36]:
## grid search cv

from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid = GridSearchCV(estimator = model,param_grid=params,scoring='accuracy',cv = cv, n_jobs=-1)

In [37]:
grid.fit(X_train, y_train)

200 fits failed out of a total of 375.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
25 fits failed with the following error:
Traceback (most recent call last):
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1193

In [38]:
grid.best_params_

{'C': 0.01, 'penalty': 'l1', 'solver': 'saga'}

In [39]:
grid.best_score_

np.float64(0.9242857142857142)

In [40]:
y_pred = grid.predict(X_test)

In [41]:
score=accuracy_score(y_test,y_pred)
print("Accuracy of the model is: ", score)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy of the model is:  0.92
Confusion Matrix:
 [[149   8]
 [ 16 127]]
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.95      0.93       157
           1       0.94      0.89      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



### Randomized SearchCV

In [42]:
from sklearn.model_selection import RandomizedSearchCV

In [44]:
randomcv = RandomizedSearchCV(estimator=model, param_distributions=params, n_iter=10, scoring='accuracy', cv=5, n_jobs=-1)

In [None]:
randomcv.fit(X_train, y_train)
print("Best parameters from Randomized Search CV:", randomcv.best_params_)

Best parameters from Randomized Search CV: {'solver': 'saga', 'penalty': 'l1', 'C': 0.01}


20 fits failed out of a total of 50.
The score on these train-test partitions for these parameters will be set to nan.
If these failures are not expected, you can try to debug them by setting error_score='raise'.

Below are more details about the failures:
--------------------------------------------------------------------------------
10 fits failed with the following error:
Traceback (most recent call last):
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/model_selection/_validation.py", line 866, in _fit_and_score
    estimator.fit(X_train, y_train, **fit_params)
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/base.py", line 1389, in wrapper
    return fit_method(estimator, *args, **kwargs)
  File "/home/mukulvyas/.config/jupyterlab-desktop/jlab_server/envs/protfeat_env/lib/python3.10/site-packages/sklearn/linear_model/_logistic.py", line 1193, 

In [46]:
randomcv.best_score_

np.float64(0.9228571428571428)

In [47]:
randomcv.best_params_

{'solver': 'saga', 'penalty': 'l1', 'C': 0.01}

In [49]:
y_pred = randomcv.predict(X_test)

In [50]:
score=accuracy_score(y_test,y_pred)
print("Accuracy of the model is: ", score)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy of the model is:  0.92
Confusion Matrix:
 [[149   8]
 [ 16 127]]
Classification Report:
               precision    recall  f1-score   support

           0       0.90      0.95      0.93       157
           1       0.94      0.89      0.91       143

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Logistic Regression for MultiClass Classification 

In [51]:
X,y = make_classification(n_samples=1000, n_features=10, n_classes=3, n_informative = 3, random_state=15)

In [52]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [54]:
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(X_train, y_train)



In [57]:
y_pred = logistic.predict(X_test)

In [58]:
score=accuracy_score(y_test,y_pred)
print("Accuracy of the model is: ", score)
cm = confusion_matrix(y_test, y_pred)
print("Confusion Matrix:\n", cm)
print("Classification Report:\n", classification_report(y_test, y_pred))

Accuracy of the model is:  0.79
Confusion Matrix:
 [[84 10  8]
 [ 3 74 25]
 [10  7 79]]
Classification Report:
               precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300

