<a href="https://colab.research.google.com/github/rajprakrit/ML-Library-1/blob/master/LogisticRegression_ML_Lib.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [0]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets

In [0]:
class LogisticRegression:
  '''
  defining a class for training a logistic regression model
  '''

  def __init__(self, alpha, n_iters, n_class):  #default value for parameters are set
    '''
    init parameters are set for every object of this class
    ''' 

    self.alpha = alpha
    self.n_iters = n_iters
    self.n_class= n_class
    self.weights = None
    self.bias = None

  def fit(self, X, y):
    ''' we basically initialize parameters for every class and optimize the cost over the parameters for that particular class.
    So, our loop runs n_class * n_iters times and we end up in optimizing all the parametrs.
    One vs all method is used so that it can be interpreted in the form of a binary classification problem.For particular class(label)
    it is cosidered as 1 and other labels a 0.The above interpretation makes our traing equivalent to traing ten binary classification models'''

    n_samples, n_features = X.shape
    # init parameters are initialized(defined for every object in class)
    self.weights = np.zeros([self.n_class, n_features]) #we define weights for every class(here 0 to 9)
    self.bias = np.zeros([self.n_class])

    for i in range (self.n_iters):
      y_predicted = self.hypothesis(X)  #hypothestis function
      y_predicted = self.sigmoid(y_predicted) #using sigmoid method of class to consider sigmoid function

      for j in range(self.n_class): # considering every class(using one vs all method) and updating weights
        y_class = np.zeros([n_samples])
        for k in range(n_samples):# taking particular class as 1 and other as 0
          if y[k]==j:
            y_class[k] = 1

        dw = (1/n_samples) * ((X.T) @ (y_predicted[:,j] - y_class)) #calculating weights
        db = (1/n_samples) * (np.sum(y_predicted[:,j] - y_class))

        self.weights[j] -= self.alpha * (dw) #updating weights and bias
        self.bias[j] -= self.alpha * (db)


  def hypothesis(self, X):
    return (X @ ((self.weights).T)) + self.bias


  def predict(self, X):
    '''predict method predict the target as the label with highest value of sigmoid of the hypothesis'''

    y_predicted = X @ ((self.weights).T) + self.bias
    y_predicted = self.sigmoid(y_predicted)
    predictions = [np.argmax(i) for i in y_predicted] #predicting the label with max value of sigmoid
    return predictions
        

  def sigmoid(self, x):
    '''a method to calculate sigmoid function'''

    return 1./(1+np.exp(-x))

  def _accuracy(self, X, y): 
    '''accuray of our model taking into account the predictions and actual output'''

    X = self.predict(X)
    acc = np.sum(X == y)/len(y)
    return acc*100

  def train_test_split(self, X, y, ratio): #creating a method for train test split of our dataset
    ''' a simple method that splits data into train and test samples'''

    size = X.shape[0];
    count = size*(1-ratio)
    count = int(count)
    X_train = X[:count]
    y_train = y[:count]
    X_test = X[count:]
    y_test = y[count:]

    return X_train, X_test, y_train, y_test
        

In [0]:
mnist = datasets.load_digits() #getting mnist dataset

In [0]:
X, y = mnist.data, mnist.target

In [0]:
clf = LogisticRegression(0.01, 1000, 10) #creating object regressor

In [0]:
X_train, X_test, y_train, y_test = clf.train_test_split(X, y, 0.2) #splitting data into 80 to 20 ratio

In [0]:
clf.fit(X_train, y_train) #training our model

In [0]:
predictions = clf.predict(X_test)

In [0]:
clf._accuracy(X_test, y_test) #fetching accuracy of our model

90.83333333333333

USING SKLEARN TO TRAIN OUR MODEL

In [0]:
from sklearn.linear_model import LogisticRegression

In [0]:
clf1 = LogisticRegression()

In [0]:
clf1.fit(X_train, y_train)



LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='warn', n_jobs=None, penalty='l2',
                   random_state=None, solver='warn', tol=0.0001, verbose=0,
                   warm_start=False)

In [0]:
predictions = clf1.predict(X_test)

In [0]:
accuracy = np.sum(predictions == y_test)/len(y_test)

In [0]:
accuracy

0.8972222222222223

In [0]:
accuracy * 100

89.72222222222223

**accuracy from our model is  90.83% and that from sklearn is 89.72%**