In [55]:
import numpy as np
#X: mxn, y:mx1, w:nx1
class LogisticRegression:
  def __init__(self, learning_rate = 1e-3, n_iterations = 10):
    self.learning_rate = learning_rate
    self.n_iterations = n_iterations
    self.w = None
  def sigmoid(self, x):
    return 1/(1+np.exp(-x))

  def y_hat(self, X):
    return self.sigmoid(np.dot(X,self.w))    

  def loss(self, yhat, y):
    J = y*np.log(yhat) + (1-y)*np.log(1-yhat)
    return np.mean(J)

  def gradient_descent(self, X, yhat, y):
    dw = (1 / self.m) * np.dot(X.T, (yhat - y))
    self.w -= self.learning_rate*dw
    return self.w

  def fit(self,X,y):
    y = y.reshape(-1,1)
    ones_arr = np.ones((X.shape[0], 1))
    X = np.append(X, ones_arr, axis = 1)
    self.m, self.n = X.shape
    # initial weight
    self.w = np.zeros((self.n, 1))
    # Training phase
    for i in range(self.n_iterations):
      # yhat
      yhat = self.y_hat(X)
      # loss
      J = self.loss(yhat, y)
      if i%(self.n_iterations//10) == 0:
        print(f'Loss at iteration {i+1} is {J}')
        
      # Gradient descent
      self.w = self.gradient_descent(X, yhat, y)

  def predict(self,X):
    ones_arr = np.ones((X.shape[0], 1))
    X = np.append(X, ones_arr, axis = 1)
    y_predict = self.y_hat(X)
    y_predict_class = [1 if j >= 0.5 else 0 for j in y_predict]
    return y_predict_class
  
  def accuracy(self, yhat, y):
    return np.sum(yhat == y)/len(yhat)

In [56]:
# Load dataset
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

bc = load_breast_cancer()
X,y = bc.data, bc.target
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size = 0.2, random_state = 1)

logistic_regression = LogisticRegression(learning_rate = 0.00001, n_iterations= 1000)
logistic_regression.fit(X_train, y_train)
predictions = logistic_regression.predict(X_val)
acc = logistic_regression.accuracy(predictions, y_val)
acc

Loss at iteration 1 is -0.6931471805599453
Loss at iteration 101 is -0.4584087546728591
Loss at iteration 201 is -0.355176736183702
Loss at iteration 301 is -0.32461390132401113
Loss at iteration 401 is -0.3032509775032446
Loss at iteration 501 is -0.2877120346760734
Loss at iteration 601 is -0.27603641087046776
Loss at iteration 701 is -0.2669627200994013
Loss at iteration 801 is -0.25969321731720313
Loss at iteration 901 is -0.25372127863885907


0.9122807017543859