# Introduction to Machine Learning

## Logistic Regression

Logistic regression is a statistical method for predicting binary classes. 

In [14]:
# implementation of logistic regression

import numpy as np
from tqdm import trange

class LogisticRegression():
    def __init__(self, lr=0.001, epochs=1000):
        self.lr = lr
        self.epochs = epochs
        self.weights = None
        self.bias = None

    def sigmoid(self, z):
        z = np.clip(z, -500, 500) # avoid overflow
        return 1 / (1 + np.exp(-z))

    def compute_loss(self, y_true, y_pred):
        eps = 1e-15
        y_pred = np.clip(y_pred, eps, 1-eps) # avoid log(0)
        loss = - (y_true * np.log(y_pred) + (1 - y_true) * np.log(1 - y_pred))
        return np.mean(loss)
    
    def compute_predictions(self, X):
        z = np.dot(X, self.weights) + self.bias
        return self.sigmoid(z)

    def compute_gradients(self, X, y_true, y_pred):
        if len(y_true) != len(y_pred):
            raise ValueError("y_true and y_pred must have the same length")
            
        errors = y_pred - y_true
        dw = np.dot(X.T, errors) / X.shape[0]
        db = np.mean(errors)
        return dw, db

    def accuracy(y_true, y_pred):
        return np.mean(np.array(y_true) == np.array(y_pred))
        

    def fit(self, X, y):
        X = np.array(X)
        y = np.array(y)
        self.weights = np.zeros(X.shape[1])
        self.bias = 0

        for epoch in trange(self.epochs, desc="Training Progress"):
            predictions = self.compute_predictions(X)
            loss = self.compute_loss(y, predictions)
            dw, db = self.compute_gradients(X, y, predictions)

            self.weights -= self.lr * dw
            self.bias   -= self.lr * db

            if epoch % 100 == 0 or epoch == self.epochs - 1:
                print(f"[Epoch {epoch + 1}/{self.epochs}] Loss: {loss:.6f}")

    def predict(self, X, threshold=0.5):
        predictions = self.compute_predictions(X)
        return (predictions >= threshold).astype(int)
    

### Testing Logistic Regression

In [15]:
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler 
from sklearn.linear_model import LogisticRegression as SklearnLogisticRegression

data = load_breast_cancer()
X_raw = data.data.tolist()
y = data.target.tolist()

X_train_raw, X_test_raw, y_train, y_test = train_test_split(X_raw, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train_raw).tolist()
X_test = scaler.transform(X_test_raw).tolist()

clf = LogisticRegression()
clf.fit(X_train, y_train)
clf_pred = clf.predict(X_test)

Training Progress: 100%|█████████████████| 1000/1000 [00:00<00:00, 30659.89it/s]

[Epoch 1/1000] Loss: 0.693147
[Epoch 101/1000] Loss: 0.546664
[Epoch 201/1000] Loss: 0.461122
[Epoch 301/1000] Loss: 0.405350
[Epoch 401/1000] Loss: 0.365906
[Epoch 501/1000] Loss: 0.336352
[Epoch 601/1000] Loss: 0.313247
[Epoch 701/1000] Loss: 0.294592
[Epoch 801/1000] Loss: 0.279145
[Epoch 901/1000] Loss: 0.266094
[Epoch 1000/1000] Loss: 0.254988



