``Naive Logistic Regression``

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
from sklearn.datasets import make_classification
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

RAND = 9876
np.random.seed(RAND) 

In [2]:
X, y = make_classification(n_samples=300, n_features=10)
X.shape, y.shape

((300, 10), (300,))

In [3]:
y.sum()

151

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.15, random_state=RAND, stratify=y)
X_train.shape, X_test.shape

((255, 10), (45, 10))

In [5]:
class My_LogisticRegression:
    
    def __init__(self, lr=0.01, n_iters=1000):
        self.lr = lr
        self.n_iters = n_iters
        self.wts = None
        self.bias = None
        self.is_fit = False
        
        
    def fit(self, X, y):
        n_samples, n_fts  = X.shape
        
        # random wt and bias initialization
        self.wts = np.zeros(n_fts)
        self.bias = np.zeros(1)
        
        # iterate until convergence
        for _ in range(self.n_iters):
            reg_val = np.dot(X, self.wts) + self.bias
            y_pred = self._sigmoid(reg_val)
            
            # find gradients w.r.t wts and bias
            dw = (1/n_samples)*np.dot((y_pred-y), X)
            db = (1/n_samples)*np.sum(y_pred-y)
            
            # upadate wts and bias
            self.wts -= dw*self.lr
            self.bias -= db*self.lr
        
        self.is_fit = True
    
        
    def predcit(self, X):
        if self.is_fit:
            preds = self._sigmoid(np.dot(X, self.wts) + self.bias)
            mask = (preds > 0.5)
            preds = np.array(mask, dtype='int64')
            return preds
        else:
            raise Exception('first train the algorithm')
            
            
    def _sigmoid(self, x):
        return 1/(1 + np.exp(-x))
        

In [6]:
log_reg = My_LogisticRegression()
log_reg.fit(X_train, y_train)
preds = log_reg.predcit(X_test)
accuracy_score(y_test, preds)

0.9111111111111111

In [7]:
from sklearn.linear_model import LogisticRegression as LogReg
sk_log_reg = LogReg()
sk_log_reg.fit(X_train, y_train)
sk_preds = sk_log_reg.predict(X_test)
print(accuracy_score(y_test, sk_preds))

0.8888888888888888


In [8]:
sk_log_reg.coef_

array([[-0.00259108, -0.17322064, -0.90914312,  0.09057515, -0.30125718,
         1.53251295, -0.04055468,  0.2368014 , -0.31097296,  0.40054413]])

In [9]:
log_reg.wts

array([-0.00269666, -0.12355265, -0.7250062 ,  0.0399096 , -0.20777501,
        1.19536054, -0.01096003,  0.15346569, -0.25217163,  0.22824192])

In [None]:
# mine is better??