In [120]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

In [121]:
from sklearn.datasets import make_classification

In [122]:
# create dataset
X, y = make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [123]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [124]:
# Model training
from sklearn.linear_model import LogisticRegression
logistic = LogisticRegression()

In [125]:
logistic.fit(X_train,y_train)

In [126]:
y_pred = logistic.predict(X_test)

In [127]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [128]:
score = accuracy_score(y_test, y_pred)
print(score)
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

0.8466666666666667
[[118  17]
 [ 29 136]]
              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



#### Hyperparameter Tuning and Cross Validation

In [129]:
model = LogisticRegression()
model = LogisticRegression(max_iter=5000)

param_grid = [

    # l1 penalty
    {
        'penalty': ['l1'],
        'solver': ['liblinear', 'saga'],
        'C': [100, 10, 1.0, 0.1, 0.01]
    },

    # l2 penalty
    {
        'penalty': ['l2'],
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
        'C': [100, 10, 1.0, 0.1, 0.01]
    },

    # elasticnet penalty
    {
        'penalty': ['elasticnet'],
        'solver': ['saga'],
        'C': [100, 10, 1.0, 0.1, 0.01],
        'l1_ratio': [0.2, 0.5, 0.8]
    }
]


In [130]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5)

In [131]:
#  GridSearchCV
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=cv, n_jobs=1)

In [132]:
grid

In [133]:
grid.fit(X_train, y_train)

In [134]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'newton-cg'}

In [135]:
grid.best_score_

np.float64(0.8785714285714287)

In [136]:
y_pred = grid.predict(X_test)

In [137]:
score = accuracy_score(y_test, y_pred)
print(score)
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

0.8533333333333334
[[124  11]
 [ 33 132]]
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



#### Randomized SearchCV

In [138]:
from sklearn.model_selection import RandomizedSearchCV

In [139]:
model = LogisticRegression()

randomcv = RandomizedSearchCV(estimator=model, param_distributions=param_grid, cv = 5, scoring='accuracy')

In [140]:
randomcv.fit(X_train, y_train)



In [141]:
randomcv.best_score_

np.float64(0.8728571428571428)

In [142]:
randomcv.best_params_

{'solver': 'lbfgs', 'penalty': 'l2', 'C': 0.1}

In [143]:
y_pred = randomcv.predict(X_test)

In [144]:
score = accuracy_score(y_test, y_pred)
print(score)
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

0.8533333333333334
[[120  15]
 [ 29 136]]
              precision    recall  f1-score   support

           0       0.81      0.89      0.85       135
           1       0.90      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



## Multiclass Classification

In [145]:
X, y = make_classification(n_samples=1000, n_features=10,n_informative=3, n_classes=3, random_state=15)

In [146]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [147]:
logistic = LogisticRegression(multi_class='ovr')
logistic.fit(X_train, y_train)
y_pred = logistic.predict(X_test)



In [148]:
score = accuracy_score(y_test, y_pred)
print(score)
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

0.79
[[84 10  8]
 [ 3 74 25]
 [10  7 79]]
              precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300



#### Hyperparameter tuning

In [149]:
model = LogisticRegression()
model = LogisticRegression(max_iter=5000)

param_grid = [

    # l1 penalty
    {
        'penalty': ['l1'],
        'solver': ['liblinear', 'saga'],
        'C': [100, 10, 1.0, 0.1, 0.01]
    },

    # l2 penalty
    {
        'penalty': ['l2'],
        'solver': ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga'],
        'C': [100, 10, 1.0, 0.1, 0.01]
    },

    # elasticnet penalty
    {
        'penalty': ['elasticnet'],
        'solver': ['saga'],
        'C': [100, 10, 1.0, 0.1, 0.01],
        'l1_ratio': [0.2, 0.5, 0.8]
    }
]


In [150]:
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold(n_splits=5)

In [151]:
#  GridSearchCV
from sklearn.model_selection import GridSearchCV

grid = GridSearchCV(estimator=model, param_grid=param_grid, scoring='accuracy', cv=cv, n_jobs=1)

In [152]:
grid

In [153]:
grid.fit(X_train, y_train)

In [154]:
grid.best_params_

{'C': 0.1, 'penalty': 'l1', 'solver': 'saga'}

In [155]:
grid.best_score_

np.float64(0.8042857142857143)

In [157]:
y_pred = grid.predict(X_test)

In [158]:
score = accuracy_score(y_test, y_pred)
print(score)
cm = confusion_matrix(y_test, y_pred)
print(cm)
print(classification_report(y_test, y_pred))

0.7733333333333333
[[84 11  7]
 [ 4 74 24]
 [ 8 14 74]]
              precision    recall  f1-score   support

           0       0.88      0.82      0.85       102
           1       0.75      0.73      0.74       102
           2       0.70      0.77      0.74        96

    accuracy                           0.77       300
   macro avg       0.78      0.77      0.77       300
weighted avg       0.78      0.77      0.77       300

