In [2]:
import pandas as pd
import numpy as np
import sys
from sklearn import datasets
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [3]:
def sigmoid(z):
    return 1.0 / (1.0 + np.exp(-z))

def binary_cross_entropy(y_true, y_pred):
    epsilon = 1e-15  # Small constant to avoid log(0)
    y_pred = np.clip(y_pred, epsilon, 1 - epsilon)  # Clip values to avoid log(0) and log(1)
    return -np.mean(y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))


In [46]:
class LogisticRegression:
    def __init__(self, lr):
        self.weights = None
        self.bias = 0
        self.lr = lr
    
    def optimize(self, X, y, y_pred):
        n = X.shape[0]
        error = y_pred-y
        
        dw = (1/n) *  np.dot(X.T, error)  
        db = (1/n) * np.sum(error) 
        
        self.weights = self.weights - self.lr*dw
        self.bias = self.bias - self.lr*db
    
    def predict(self, X, test=False):
        y_pred = np.dot(X, self.weights) + self.bias
        y_pred = sigmoid(y_pred)
        if test :
            y_pred = [0 if y<=0.5 else 1 for y in y_pred]
        return y_pred 
    
    def train(self, X, y, epochs):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        for epoch in range(epochs):
            y_pred = self.predict(X)
                                               
            ## Calculating error using Cross entropy 
            error = binary_cross_entropy(y, y_pred)

            ## Gradient descent OPtimation to Update WEights
            self.optimize(X, y, y_pred)
            
            if epoch % 10 == 0:
                sys.stdout.write(
                    "\n" +
                    "I:" + str(epoch) +
                    " Train-Err:" + str(error / float(len(X_train)))[0:5] + 
                    "\n"
                )
        return y_pred

In [47]:
data = datasets.load_breast_cancer()
features = pd.DataFrame(data['data'])
target = pd.DataFrame(data['target'])

X_train, X_test, y_train, y_test = train_test_split(features, target, test_size=0.2, random_state=42)


# # Reshape y_train from (80,) to (80, 1)
y_train = np.array(y_train) ; y_train = y_train.reshape(-1)
y_test = np.array(y_test) ; y_test = y_test.reshape(-1)

X_train = np.array(X_train)
X_test = np.array(X_test)

y_train.shape

(455,)

In [48]:
model = LogisticRegression(lr=0.01)

y_pred = model.train(X_train, y_train, epochs=100)
y_test_pred = model.predict(X_test)



I:0 Train-Err:0.001

I:10 Train-Err:0.047

I:20 Train-Err:0.047

I:30 Train-Err:0.047

I:40 Train-Err:0.047

I:50 Train-Err:0.047

I:60 Train-Err:0.047

I:70 Train-Err:0.047

I:80 Train-Err:0.047

I:90 Train-Err:0.047


  return 1.0 / (1.0 + np.exp(-z))
