# Logistic Regression Implementation

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings

warnings.filterwarnings('ignore')

In [2]:
from sklearn.datasets import make_classification

In [3]:
# Create the dataset
x, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_classes=2,
    random_state=15,
)

In [4]:
x.shape, y.shape

((1000, 10), (1000,))

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=15)

In [6]:
x_train.shape, x_test.shape

((700, 10), (300, 10))

In [7]:
y_train.shape, y_test.shape

((700,), (300,))

In [8]:
# Model Training
from sklearn.linear_model import LogisticRegression

log_model = LogisticRegression()

In [9]:
log_model.fit(x_train, y_train)

In [10]:
y_pred = log_model.predict(x_test)

In [11]:
y_pred

array([0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1,
       1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0,
       1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 1,
       0, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,
       1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0,
       1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 1, 1, 0,
       0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0,
       1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0,
       1, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 1,
       0, 1, 1, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0,
       0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0,
       1, 1, 0, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0,
       0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1,
       1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1])

In [12]:
log_model.predict_proba(x_test)

array([[9.84351360e-01, 1.56486398e-02],
       [1.15138114e-04, 9.99884862e-01],
       [8.63156979e-01, 1.36843021e-01],
       [3.72582077e-02, 9.62741792e-01],
       [9.94441645e-01, 5.55835487e-03],
       [9.75927551e-01, 2.40724492e-02],
       [3.75079094e-02, 9.62492091e-01],
       [9.83299549e-01, 1.67004507e-02],
       [1.73640652e-02, 9.82635935e-01],
       [7.19483094e-01, 2.80516906e-01],
       [8.78317943e-01, 1.21682057e-01],
       [4.34277637e-01, 5.65722363e-01],
       [1.60611151e-01, 8.39388849e-01],
       [9.09248488e-04, 9.99090752e-01],
       [9.82296917e-01, 1.77030834e-02],
       [4.93531136e-03, 9.95064689e-01],
       [8.29721582e-01, 1.70278418e-01],
       [6.59724959e-01, 3.40275041e-01],
       [2.33259882e-02, 9.76674012e-01],
       [9.97667641e-01, 2.33235936e-03],
       [3.16791080e-03, 9.96832089e-01],
       [2.08996033e-03, 9.97910040e-01],
       [2.87253330e-03, 9.97127467e-01],
       [8.93965633e-01, 1.06034367e-01],
       [8.259758

In [13]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

In [14]:
score = accuracy_score(y_test, y_pred)
print(f"Score: {score}\n")
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}\n")
print(classification_report(y_test, y_pred))

Score: 0.92

Confusion Matrix:
[[134  11]
 [ 13 142]]

              precision    recall  f1-score   support

           0       0.91      0.92      0.92       145
           1       0.93      0.92      0.92       155

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Hyperparameter Tuning and Cross Validation

In [15]:
model_log_opt = LogisticRegression()
penalty = ['l1', 'l2', 'elasticnet']
c_values = [100, 10, 1.0, 0.1, 0.01]
solver = ['newton-cg', 'lbfgs', 'liblinear', 'sag', 'saga']

In [16]:
params = dict(penalty =  penalty, C =  c_values, solver =  solver)

In [17]:
# GridSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
cv = StratifiedKFold()
grid_model = GridSearchCV(estimator=model_log_opt, param_grid=params, scoring='accuracy', cv=cv, n_jobs = -1)

In [18]:
grid_model

In [19]:
grid_model.fit(x_train, y_train)



In [21]:
grid_model.best_params_

{'C': 0.1, 'penalty': 'l2', 'solver': 'liblinear'}

In [22]:
grid_model.best_score_

np.float64(0.9228571428571429)

In [24]:
y_pred = grid_model.predict(x_test)

In [25]:
score = accuracy_score(y_test, y_pred)
print(f"Score: {score}\n")
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}\n")
print(classification_report(y_test, y_pred))

Score: 0.9233333333333333

Confusion Matrix:
[[137   8]
 [ 15 140]]

              precision    recall  f1-score   support

           0       0.90      0.94      0.92       145
           1       0.95      0.90      0.92       155

    accuracy                           0.92       300
   macro avg       0.92      0.92      0.92       300
weighted avg       0.92      0.92      0.92       300



## Randomized Search CV

In [26]:
from sklearn.model_selection import RandomizedSearchCV

In [27]:
model_log_opt = LogisticRegression()
randomcv = RandomizedSearchCV(estimator=model_log_opt, param_distributions=params, cv=5, scoring='accuracy')

In [28]:
randomcv.fit(x_train, y_train)

In [29]:
randomcv.best_params_

{'solver': 'liblinear', 'penalty': 'l1', 'C': 1.0}

In [30]:
randomcv.best_score_

np.float64(0.9214285714285714)

## Logistic Regression for Multiclass Classification Problem

In [32]:
# Create the dataset
x, y = make_classification(
    n_samples=1000,
    n_features=10,
    n_informative=3,
    n_classes=3,
    random_state=15,
)

In [34]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3, random_state=42)

In [35]:
from sklearn.linear_model import LogisticRegression
log_model = LogisticRegression(multi_class='ovr')
log_model.fit(x_train, y_train)

In [36]:
y_pred = log_model.predict(x_test)

In [37]:
y_pred

array([2, 1, 2, 1, 1, 0, 0, 0, 2, 0, 2, 1, 2, 2, 2, 2, 2, 0, 0, 2, 2, 1,
       1, 1, 1, 0, 0, 0, 2, 1, 0, 2, 2, 1, 2, 0, 0, 2, 2, 1, 2, 2, 2, 1,
       2, 0, 1, 2, 0, 1, 0, 0, 0, 1, 1, 2, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0,
       2, 1, 0, 1, 0, 1, 2, 1, 2, 2, 1, 0, 1, 0, 1, 0, 1, 2, 2, 0, 1, 2,
       2, 1, 1, 2, 2, 0, 0, 0, 2, 2, 0, 1, 2, 1, 2, 1, 0, 2, 0, 2, 0, 1,
       2, 1, 2, 2, 1, 1, 1, 1, 2, 0, 2, 0, 1, 2, 0, 0, 2, 2, 2, 1, 2, 0,
       2, 2, 0, 0, 0, 2, 0, 2, 0, 1, 2, 1, 1, 2, 0, 0, 1, 1, 2, 2, 2, 1,
       2, 0, 2, 2, 2, 1, 0, 2, 0, 0, 2, 0, 2, 0, 0, 1, 2, 0, 1, 1, 1, 1,
       0, 2, 1, 0, 0, 1, 2, 2, 2, 2, 2, 0, 1, 1, 2, 2, 1, 2, 2, 2, 2, 1,
       0, 0, 1, 2, 2, 0, 0, 2, 1, 2, 1, 0, 0, 2, 1, 1, 1, 2, 2, 1, 2, 1,
       0, 1, 0, 0, 1, 0, 2, 1, 0, 2, 2, 1, 1, 1, 2, 1, 1, 0, 1, 1, 0, 0,
       0, 0, 2, 0, 0, 2, 2, 2, 2, 0, 2, 0, 1, 2, 2, 2, 1, 0, 0, 1, 0, 2,
       1, 2, 0, 0, 0, 2, 2, 1, 2, 0, 1, 1, 0, 0, 0, 1, 0, 2, 2, 0, 2, 0,
       0, 0, 1, 1, 2, 0, 1, 2, 2, 0, 1, 2, 0, 2])

In [38]:
score = accuracy_score(y_test, y_pred)
print(f"Score: {score}\n")
cm = confusion_matrix(y_test, y_pred)
print(f"Confusion Matrix:\n{cm}\n")
print(classification_report(y_test, y_pred))

Score: 0.79

Confusion Matrix:
[[84 10  8]
 [ 3 74 25]
 [10  7 79]]

              precision    recall  f1-score   support

           0       0.87      0.82      0.84       102
           1       0.81      0.73      0.77       102
           2       0.71      0.82      0.76        96

    accuracy                           0.79       300
   macro avg       0.79      0.79      0.79       300
weighted avg       0.80      0.79      0.79       300

