## Logistic Regression Implementation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [4]:
from sklearn.datasets import make_classification

In [6]:
## create the dataset
X, y=make_classification(n_samples=1000, n_features=10, n_classes=2, random_state=42)

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [12]:
from sklearn.linear_model import LogisticRegression
logistic=LogisticRegression()

In [14]:
logistic.fit(X_train, y_train)

In [16]:
y_pred=logistic.predict(X_test)
print(y_test)

[0 1 1 1 0 1 0 0 0 0 0 1 1 1 1 0 1 1 1 1 0 0 1 1 0 1 1 0 0 1 1 1 0 0 1 0 1
 1 1 1 0 1 0 0 0 1 1 1 1 1 0 1 1 0 1 1 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 1 1 0
 1 0 1 1 1 1 1 0 1 0 1 0 0 0 0 1 1 1 0 0 1 1 0 0 1 0 1 0 1 0 0 0 0 1 1 1 0
 1 1 1 1 0 1 1 0 1 1 1 0 1 1 1 1 1 1 1 1 0 1 1 1 0 0 0 0 1 0 0 0 0 0 0 1 0
 0 0 1 0 0 1 0 0 1 0 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 0 1 1 0 1 1 1 1 1 1 1 0
 0 0 0 0 1 1 0 1 0 1 0 0 1 1 1 1 1 0 1 0 0 1 1 1 1 0 0 0 0 0 1 0 1 0 1 1 0
 0 1 1 1 0 1 1 0 0 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 0 1 1 1 1 0 0 0 1 1 1 1
 0 1 1 1 1 0 0 0 1 1 0 0 1 0 0 0 0 1 1 0 1 1 0 1 0 1 1 1 0 0 1 0 1 1 0 0 1
 1 1 1 0]


In [18]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [19]:
score=accuracy_score(y_test, y_pred)
print(score)

cm=confusion_matrix(y_test,y_pred)
print(cm)

print(classification_report(y_test, y_pred))

0.8466666666666667
[[118  17]
 [ 29 136]]
              precision    recall  f1-score   support

           0       0.80      0.87      0.84       135
           1       0.89      0.82      0.86       165

    accuracy                           0.85       300
   macro avg       0.85      0.85      0.85       300
weighted avg       0.85      0.85      0.85       300



## Hyperparameter Tuning and Cross Validation

In [88]:
model=LogisticRegression()
penalty=['l2']
c_values=[100,10, 1.0, 0.1, 0.01]
solver=['lbfgs', 'sag',]

In [89]:
params=dict(penalty=penalty, C=c_values, solver=solver)

In [90]:
params

{'penalty': ['l2'], 'C': [100, 10, 1.0, 0.1, 0.01], 'solver': ['lbfgs', 'sag']}

In [91]:

from sklearn.model_selection import StratifiedKFold
cv=StratifiedKFold()

In [92]:
## GridSearchCV

from sklearn.model_selection import GridSearchCV
grid=GridSearchCV(estimator=model_new, param_grid=params, scoring='accuracy', cv=cv, n_jobs=-1)

In [93]:
grid

In [104]:
grid.fit(X_train, y_train)

In [105]:
grid.best_params_

{'C': 0.01, 'penalty': 'l2', 'solver': 'lbfgs'}

In [106]:
grid.best_score_

0.8785714285714287

In [107]:
y_pred=grid.predict(X_test)

In [108]:
score=accuracy_score(y_test, y_pred)
print(score)

cm=confusion_matrix(y_test,y_pred)
print(cm)

print(classification_report(y_test, y_pred))

0.8533333333333334
[[124  11]
 [ 33 132]]
              precision    recall  f1-score   support

           0       0.79      0.92      0.85       135
           1       0.92      0.80      0.86       165

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300



## Randomized SearchCV

In [95]:
from sklearn.model_selection import RandomizedSearchCV

In [96]:
model=LogisticRegression()
randomcv=RandomizedSearchCV(estimator=model, param_distributions=params, cv=5, scoring='accuracy')

In [97]:
randomcv.fit(X_train, y_train)

In [98]:
randomcv.best_score_

0.8785714285714287

In [99]:
randomcv.best_params_

{'solver': 'lbfgs', 'penalty': 'l2', 'C': 0.01}

In [100]:
y_pred=randomcv.predict(X_test)

In [102]:
score=accuracy_score(y_pred,y_test)
print(score)
print(classification_report(y_pred, y_test))
print(confusion_matrix(y_pred, y_test))

0.8533333333333334
              precision    recall  f1-score   support

           0       0.92      0.79      0.85       157
           1       0.80      0.92      0.86       143

    accuracy                           0.85       300
   macro avg       0.86      0.86      0.85       300
weighted avg       0.86      0.85      0.85       300

[[124  33]
 [ 11 132]]
