In [33]:
# importing necessary packages
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_breast_cancer # toy dataset
from sklearn.linear_model import LogisticRegression # to compare results with our custom model

Theory & Formula:

Line function : 

In [34]:
class MyLogisticRegressor:
    '''
    Implementing logistic regression algorithm from scratch
    '''
    def __init__(self, n_iter=1000, learning_rate=0.01):
        '''
        Initializing hyperparameters
        '''
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.bias = None
        
    @staticmethod
    def hypothesis(x):
        '''
        Helper function to calculate the hypothesis of logistic regression.
        Formula : returns sigmoid values (F(x) = 1 / (1 + exp(-x)))
        '''
        return 1 / (1 + np.exp(-x))
    
    def fit(self, X, y):
        '''
        Function to train logistic regression model
        '''
        m = 1 / X.shape[0] # no of examples
        
        self.weights = np.zeros(X.shape[1]) # initializing weights for every feature
        self.bias = 0
        
        for i in range(self.n_iter):
            # getting initial probabilites from sigmoid function
            initial_probabilites = self.hypothesis(np.dot(X, self.weights) + self.bias)
            
            # calculating partial derivates
            dW = 1/m * (2 * np.dot(X.T, (initial_probabilites - y)))
            db = 1/m * (2 * np.sum(initial_probabilites - y))
            
            # updating weights
            self.weights -= self.learning_rate * dW
            self.bias -= self.learning_rate * db
            
        print(f'Final weights : \n {self.weights}')
        print(f'Final bias : {self.bias}')
        
    def predict_proba(self, X):
        '''
        Helper function to calculate the probabilities of classes
        '''
        # gather the weights and bias calculated in training
        probs = np.dot(X, self.weights) + self.bias
        
        return self.hypothesis(probs)
    
    def predict(self, X, threshold=0.5):
        '''
        Helper function to predict the values using threshold
        '''
        probs = self.predict_proba(X)
        return np.array([1  if p >= threshold else 0 for p in probs])
    
        
        
        

In [35]:
dataset = load_breast_cancer() # loading data

X = dataset.data # seperate features
y = dataset.target # seperate target variables

# creating training and test dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state=42)

print(f'Original dataset : {X_train.shape}, Train dataset : {X_test.shape}')

my_model = MyLogisticRegressor()
my_model.fit(X_train, y_train)
m_preds = my_model.predict(X_test)
print(f'My predictions : {m_preds}')

sk_model = LogisticRegression()
sk_model.fit(X_train, y_train)
s_preds = sk_model.predict(X_test)
print(f'SK predictions : {s_preds}')

print(f'Accuracy\n My model : {accuracy_score(y_test, m_preds)} \n Sk model : {accuracy_score(y_test, s_preds)}')

Original dataset : (455, 30), Train dataset : (114, 30)
Final weights : 
 [ 1.34490465e+06  1.75562155e+06  7.81335691e+06  3.21017424e+06
  1.21047523e+04 -6.17797985e+03 -2.45652263e+04 -1.05181911e+04
  2.27853612e+04  9.76368780e+03  5.34938222e+03  1.28933119e+05
 -3.16400425e+04 -3.33034412e+06  6.49167578e+02 -1.38989110e+03
 -2.72842104e+03 -3.37407238e+02  2.06095480e+03  2.14721615e+02
  1.41190801e+06  2.18128702e+06  7.88280657e+06 -4.62463957e+06
  1.47838584e+04 -2.71601944e+04 -5.29450078e+04 -1.23953942e+04
  2.91477591e+04  8.91410825e+03]
Final bias : 175998.54999999978
My predictions : [0 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 0 1 0 1 1 1 1 1 1 0 1 1 1 0 1 1 0
 1 0 0 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 0 1 1 0 0 1 1 1 0 0 1 1 0 0 1 1
 1 1 1 0 1 1 0 1 0 0 0 0 0 0 1 1 1 1 0 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 0 0
 1 1 0]


  return 1 / (1 + np.exp(-x))


SK predictions : [1 0 0 1 1 0 0 0 1 1 1 0 1 0 1 0 1 1 1 0 1 1 0 1 1 1 1 1 1 0 1 1 1 1 1 1 0
 1 0 1 1 0 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0 1 1 1 0 0 1 1 1 0 0 1 1 0 0 1 0
 1 1 1 1 1 1 0 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 0 0 1 0 0 1 0 0 1 1 1 0 1 1 0
 1 0 0]
Accuracy
 My model : 0.9473684210526315
 Sk model : 0.956140350877193


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(
