In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [2]:
from sklearn.datasets import load_breast_cancer

# Load the dataset
dataset = load_breast_cancer()
X = dataset.data
y = dataset.target

In [3]:
sclar = StandardScaler()
X = sclar.fit_transform(X)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
X_train.shape

(455, 30)

In [6]:
n_samples , n_features = X_train.shape
weights = np.zeros(n_features)
bias = 0
predicted = np.dot(X_train, weights) + bias


In [7]:
predicted.shape, X_train.T.shape

((455,), (30, 455))

In [8]:
np.dot(X_train.T, (predicted - y_train))

array([ 1.59144923e+02,  9.77017906e+01,  1.62038687e+02,  1.54530856e+02,
        9.41711244e+01,  1.32874915e+02,  1.49352534e+02,  1.72473390e+02,
        7.73814823e+01, -1.55561990e+00,  1.24482044e+02,  6.65479595e+00,
        1.20266023e+02,  1.19684100e+02, -8.05833726e+00,  5.55774918e+01,
        4.17513888e+01,  8.07171850e+01, -5.46732217e-02,  6.89635631e+00,
        1.69736497e+02,  1.07987554e+02,  1.70537234e+02,  1.60227019e+02,
        1.06117395e+02,  1.30298542e+02,  1.40088921e+02,  1.74172919e+02,
        9.68179146e+01,  6.92195499e+01])

In [9]:

class Logistic_Regression:
    def __init__(self, learning_rate = 0.01, epochs = 10000):
        self.lr = learning_rate
        self.epochs = epochs
        
    def sigmoid(self, x):
        return 1 / (1 + np.exp(-x))
    
    def loss(self, y, y_pred):
        epsilon = 1e-15
        return -1/len(y) * np.sum(y * np.log(y_pred + epsilon) + (1 - y) * np.log(1 - y_pred + epsilon))
    
    def Accuracy(self, y, y_pred):
        y_pred_label = [1 if prob > 0.5 else 0 for prob in y_pred]
        return np.sum(y == y_pred_label) / len(y)
    
    def fit(self, X, y):

        n_samples , n_features = X.shape

        self.weights = np.zeros(n_features)
        self.bias = 0

        for epoch in range(self.epochs):

            predicted = self.sigmoid(np.dot(X, self.weights) + self.bias)

            # Gradient Descent 

            dw = 1/(n_samples) * np.dot(X.T, (predicted - y))
            db = 1/(n_samples) * np.sum(predicted - y)

            self.weights -= self.lr * dw 
            self.bias -= self.lr * db

            if epoch % 100 == 0:
                print(f'Epoch {epoch} : Loss = {self.loss(predicted, y)} : Accuracy = {self.Accuracy(y, predicted)}')

        
    def predict(self, X):
        predicted = self.sigmoid(np.dot(X, self.weights) + self.bias)
        return np.array([1 if i > 0.5 else 0 for i in predicted])
    
    def evaluate(self, X, y):
        predicted = self.predict(X)
        return f'Loss {self.loss(predicted, y) } : Accuracy {self.Accuracy(y, predicted)}  '


In [10]:
model = Logistic_Regression(learning_rate=0.1, epochs=200)

In [11]:
model.fit(X_train, y_train)

Epoch 0 : Loss = 17.269388197455335 : Accuracy = 0.37142857142857144
Epoch 100 : Loss = 2.820605938536816 : Accuracy = 0.9824175824175824


In [12]:
model.evaluate(X_test, y_test)

'Loss 0.3029717227623735 : Accuracy 0.9912280701754386  '