<a href="https://colab.research.google.com/github/rokmr/Machine-Learning/blob/main/src/colab/LogisticRegression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import datasets

import torch
from torch import nn

In [2]:
def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy

# Data

In [3]:
bc = datasets.load_breast_cancer()
X, y = bc.data, bc.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Numpy

In [4]:
import numpy as np


class LogisticRegression:
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_samples, n_features = X.shape

        # init parameters
        self.weights = np.zeros(n_features)
        self.bias = 0

        # gradient descent
        for _ in range(self.n_iters):
            # approximate y with linear combination of weights and x, plus bias
            linear_model = np.dot(X, self.weights) + self.bias
            # apply sigmoid function
            y_predicted = self._sigmoid(linear_model)

            # compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)
            # update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        linear_model = np.dot(X, self.weights) + self.bias
        y_predicted = self._sigmoid(linear_model)
        y_predicted_cls = [1 if i > 0.5 else 0 for i in y_predicted]
        return np.array(y_predicted_cls)

    def _sigmoid(self, x):
        return 1 / (1 + np.exp(-x))

In [5]:
regressor = LogisticRegression(learning_rate=0.001, n_iters=1000)
regressor.fit(X_train, y_train)
predictions = regressor.predict(X_test)
print("Logistic Regresion classification accuracy:", accuracy(y_test, predictions))

Logistic Regresion classification accuracy: 0.9473684210526315


  return 1 / (1 + np.exp(-x))


# PyTorch

In [6]:
torch.manual_seed(42)
class LogisticRegression(nn.Module):
    def __init__(self, input_features):
        super(LogisticRegression, self).__init__()
        self.layer1 = nn.Linear(input_features, 8)
        self.layer2 = nn.Linear(8, 1)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.bn = nn.BatchNorm1d(8)

    def forward(self, x):
        x = self.layer1(x)
        x = self.bn(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = torch.sigmoid(self.layer2(x))
        return x

In [7]:
X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, test_size=0.2, random_state=42)


EPOCHS = 1000
LR = 0.001
BATCH_SIZE = 32

input_features = X_train.shape[1]
model = LogisticRegression(input_features)
optimizer = torch.optim.Adam(params=model.parameters(), lr=LR)
loss_fn = nn.BCELoss()


X_train = torch.tensor(X_train).float()
y_train = torch.tensor(y_train).float().reshape(-1, 1)
train_dataset = torch.utils.data.TensorDataset(X_train, y_train)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)


best_loss = float('inf')
patience = 20    # inceasing it might overfit and dec might underfit
patience_counter = 0
best_model_state = None

for epoch in range(EPOCHS):
    epoch_loss = 0
    epoch_acc = 0
    batches = 0

    model.train()
    for batch_X, batch_y in train_loader:

        y_pred = model(batch_X)
        loss = loss_fn(y_pred, batch_y)

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        epoch_loss += loss.item()
        with torch.no_grad():
            batch_acc = ((y_pred > 0.5).float() == batch_y).float().mean()
            epoch_acc += batch_acc
        batches += 1

    epoch_loss /= batches
    epoch_acc /= batches

    if epoch % 100 == 0:
        model.eval()
        with torch.inference_mode():
            X_val_tensor = torch.tensor(X_val).float()
            val_pred = model(X_val_tensor)
            val_acc = ((val_pred > 0.5).float().squeeze() == torch.tensor(y_val).float()).float().mean()
            print(f'Epoch: [{epoch+1}/{EPOCHS}] || Loss: {epoch_loss:.4f} || Train Acc: {epoch_acc:.4f} || Val Acc: {val_acc:.4f}')

    if epoch_loss < best_loss:
        best_loss = epoch_loss
        patience_counter = 0
        best_model_state = model.state_dict()
    else:
        patience_counter += 1

    if patience_counter >= patience:
        print(f'Early stopping at epoch {epoch} || best loss: {best_loss:.4f}')
        model.load_state_dict(best_model_state)
        break

Epoch: [1/1000] || Loss: 0.7341 || Train Acc: 0.4306 || Val Acc: 0.7912
Epoch: [101/1000] || Loss: 0.2259 || Train Acc: 0.9505 || Val Acc: 0.9670
Early stopping at epoch 139 || best loss: 0.2010


In [8]:
model.eval()
with torch.no_grad():
    X_test_tensor = torch.tensor(X_test).float()
    predictions = model(X_test_tensor)
    predicted_labels = (predictions > 0.5).float().squeeze()
    acc = ((predicted_labels == torch.tensor(y_test).float()).float()).mean().item()
    print("\nFinal Test Accuracy:", acc)


Final Test Accuracy: 0.9736841917037964
