### Logistic Regression from scratch

In [64]:
import numpy as np

class LogisticRegression:
    def __init__(self,learning_rate=0.02,num_iters=10):
        self.lr = learning_rate
        self.num_iters=num_iters
        self.weights = None
        self.bias = None

    def sigmoid(self,X):
        return 1 / (1 + np.exp(-X))

    # log loss is equivalent of maximum likelihood estimation, MLE
    def log_loss(self, y_true, y_pred):
        n = len(y_true)
        y1 = y_true * np.log(y_pred) 
        y2 = (1 - y_pred) * np.log(1 - y_pred)
        return -(y1+y2)/n

    
    def feed_forward(self,X):
        z = np.dot(X,self.weights) + self.bias
        s = self.sigmoid(z)
        return s
    
    def fit(self, X, y, verbose=True):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        self.loss=[]
        self.epochs=[]
        for itr in range(self.num_iters):
            y_pred = self.feed_forward(X)
            loss = self.log_loss(y_true=y, y_pred=y_pred)
            self.loss.append(loss)
            self.epochs.append(itr)
            if verbose:
                print("loss : {}".format(loss))
            dz = y_pred - y
            # gradients
            dw = np.dot(X.T,dz) / n_samples
            db = np.sum(dz) / n_samples
            # update weights and bias
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self,X,threshold=0.5):
        y_hat = np.dot(X,self.weights) + self.bias
        y_predicted = self.sigmoid(y_hat)
        target_class = []
        for pred_value in y_predicted:
            if pred_value > threshold:
                target_class.append(1)
            else:
                target_class.append(0)
        
        return np.array(target_class)
            
        

In [68]:
# using the regression model
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split

X,y = make_classification(n_samples=1000, n_features=5, random_state=999)
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.30)

model = LogisticRegression()
model.fit(X_train, y_train, verbose=False)


In [69]:
result = model.predict(X_test)
print(result)

[1 0 0 1 1 1 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1 1 0 0 0 1 0 1 0 1 1 0 1 0 0 0
 0 0 0 0 0 0 1 0 1 0 1 1 0 1 1 1 1 0 1 1 0 0 0 1 0 0 1 0 1 1 0 1 0 0 0 0 1
 1 1 1 0 0 0 1 0 1 0 0 0 1 0 0 1 1 1 0 1 1 1 1 0 0 1 0 0 0 0 1 1 1 1 1 0 1
 0 0 0 0 1 1 0 0 1 0 0 0 1 1 1 1 0 1 1 1 0 1 1 1 1 0 0 0 0 1 1 0 1 1 0 1 0
 1 0 1 1 0 0 1 0 0 1 1 0 0 1 0 1 1 0 0 0 1 0 0 1 0 1 1 0 1 0 1 0 1 1 0 1 0
 1 0 0 0 0 1 1 0 1 0 1 0 0 0 1 0 0 1 0 1 0 0 1 1 0 1 0 1 0 0 0 1 0 1 0 0 0
 1 0 0 0 0 1 0 1 0 0 0 1 0 1 0 0 0 1 0 0 1 1 0 1 1 0 1 1 0 0 0 0 0 0 1 0 1
 1 1 1 0 1 1 0 1 1 0 0 0 0 0 1 1 1 0 0 1 0 1 0 0 0 0 0 0 1 0 0 0 1 0 1 1 1
 0 1 0 1]


In [72]:
from sklearn.metrics import classification_report
print(classification_report(y_true=y_test, y_pred=result))

              precision    recall  f1-score   support

           0       0.85      0.92      0.89       152
           1       0.91      0.84      0.87       148

    accuracy                           0.88       300
   macro avg       0.88      0.88      0.88       300
weighted avg       0.88      0.88      0.88       300

