# Logistic Regression from Scratch with PyTorch 

This tutorial demonstrates how to implement logistic regression from scratch using PyTorch.  
We use the Breast Cancer Wisconsin dataset to train and evaluate our model.


### 1. Import Required Libraries

In [44]:
import torch
import pandas as pd
import numpy as np
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler


### 2. Load and Preprocess the Dataset

We begin by loading the Breast Cancer dataset, converting it into a DataFrame, and preparing it for training. This includes:
- Splitting into features and labels
- Splitting into train/test sets
- Standardizing the feature values
- Converting everything into PyTorch tensors


In [47]:
# Load the dataset
data = load_breast_cancer()

In [48]:
# Create a DataFrame from the data
df = pd.DataFrame(data=data.data, columns=data.feature_names)

# Add the target column
df['target'] = data.target

# Optional: Display the first few rows
df.head()

Unnamed: 0,mean radius,mean texture,mean perimeter,mean area,mean smoothness,mean compactness,mean concavity,mean concave points,mean symmetry,mean fractal dimension,...,worst texture,worst perimeter,worst area,worst smoothness,worst compactness,worst concavity,worst concave points,worst symmetry,worst fractal dimension,target
0,17.99,10.38,122.8,1001.0,0.1184,0.2776,0.3001,0.1471,0.2419,0.07871,...,17.33,184.6,2019.0,0.1622,0.6656,0.7119,0.2654,0.4601,0.1189,0
1,20.57,17.77,132.9,1326.0,0.08474,0.07864,0.0869,0.07017,0.1812,0.05667,...,23.41,158.8,1956.0,0.1238,0.1866,0.2416,0.186,0.275,0.08902,0
2,19.69,21.25,130.0,1203.0,0.1096,0.1599,0.1974,0.1279,0.2069,0.05999,...,25.53,152.5,1709.0,0.1444,0.4245,0.4504,0.243,0.3613,0.08758,0
3,11.42,20.38,77.58,386.1,0.1425,0.2839,0.2414,0.1052,0.2597,0.09744,...,26.5,98.87,567.7,0.2098,0.8663,0.6869,0.2575,0.6638,0.173,0
4,20.29,14.34,135.1,1297.0,0.1003,0.1328,0.198,0.1043,0.1809,0.05883,...,16.67,152.2,1575.0,0.1374,0.205,0.4,0.1625,0.2364,0.07678,0


In [49]:
df.shape

(569, 31)

In [57]:
# Separate features and target
X = df.drop('target', axis=1)
y = df['target']

# Split the dataset: 80% train, 20% test (you can adjust the test_size as needed)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Optional: Display the shapes of the splits
print(X_train.shape, X_test.shape, y_train.shape, y_test.shape)

(455, 30) (114, 30) (455,) (114,)


In [58]:
print(type(X_train), type(X_test), type(y_train), type(y_test))

<class 'pandas.core.frame.DataFrame'> <class 'pandas.core.frame.DataFrame'> <class 'pandas.core.series.Series'> <class 'pandas.core.series.Series'>


In [59]:
# Standardize features (mean=0, std=1)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.fit_transform(X_test)

In [60]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.to_numpy(), dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test.to_numpy(), dtype=torch.float32)

In [61]:
print(type(X_train_tensor), type(X_test_tensor), type(y_train_tensor), type(y_test_tensor))

<class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'> <class 'torch.Tensor'>


In [65]:
print(X_train_tensor.dtype, X_test_tensor.dtype, y_train_tensor.dtype, y_test_tensor.dtype)

torch.float32 torch.float32 torch.float32 torch.float32


### 3. Define the Model Class

We define a class `MySimpleNN` that performs logistic regression using PyTorch tensors.

Key components:
- Manual weight and bias initialization
- Sigmoid activation
- Binary cross-entropy loss calculation


In [80]:
class MySimpleNN:
    def __init__(self, X):
        # Initialize weights and bias with correct shape
        self.weights = torch.rand(X.shape[1], 1, dtype=torch.float32, requires_grad=True)
        self.bias = torch.zeros(1, dtype=torch.float32, requires_grad=True)

    def forward(self, X):
        # Linear transformation + sigmoid activation
        z = torch.matmul(X, self.weights) + self.bias
        # Sigmoid Activation Function
        y_pred = torch.sigmoid(z)
        return y_pred

    def loss_function(self, y_pred, y_true):
        # Clamp predictions to avoid log(0)
        epsilon = 1e-7
        y_pred = torch.clamp(y_pred, epsilon, 1 - epsilon)

        # Binary cross-entropy loss
        loss = -(y_true * torch.log(y_pred) + (1 - y_true) * torch.log(1 - y_pred)).mean()
        return loss


### 4. Training Loop

We train the model using a simple gradient descent approach.

Each step includes:
- Forward pass
- Loss calculation
- Backward pass
- Manual weight update

In [81]:
# Hyperparameters
learning_rate = 0.1
epochs = 25

# Initialize the model
model = MySimpleNN(X_train_tensor)

# Training process
for epoch in range(epochs):
    # Forward pass
    y_pred = model.forward(X_train_tensor)

    # Compute loss
    loss = model.loss_function(y_pred, y_train_tensor)

    # Backward pass
    loss.backward()

    # Update weights and bias manually
    with torch.no_grad():
        model.weights -= learning_rate * model.weights.grad
        model.bias -= learning_rate * model.bias.grad

        # Zero gradients for next iteration
        model.weights.grad.zero_()
        model.bias.grad.zero_()

    # Print loss per epoch
    print(f"Epoch {epoch + 1}, Loss: {loss.item():.4f}")


Epoch 1, Loss: 3.5468
Epoch 2, Loss: 3.3806
Epoch 3, Loss: 3.2106
Epoch 4, Loss: 3.0389
Epoch 5, Loss: 2.8615
Epoch 6, Loss: 2.6850
Epoch 7, Loss: 2.5089
Epoch 8, Loss: 2.3333
Epoch 9, Loss: 2.1586
Epoch 10, Loss: 1.9879
Epoch 11, Loss: 1.8198
Epoch 12, Loss: 1.6570
Epoch 13, Loss: 1.5031
Epoch 14, Loss: 1.3627
Epoch 15, Loss: 1.2357
Epoch 16, Loss: 1.1263
Epoch 17, Loss: 1.0350
Epoch 18, Loss: 0.9615
Epoch 19, Loss: 0.9043
Epoch 20, Loss: 0.8611
Epoch 21, Loss: 0.8290
Epoch 22, Loss: 0.8052
Epoch 23, Loss: 0.7875
Epoch 24, Loss: 0.7739
Epoch 25, Loss: 0.7632


In [82]:
model.bias

tensor([0.3923], requires_grad=True)

### 5. Model Evaluation

We now evaluate the model on the test set.

We'll:
- Use the model to generate predictions
- Apply a confidence threshold
- Compute accuracy

In [88]:
with torch.no_grad():
  # Predict on test data
  y_pred = model.forward(X_test_tensor)

  # Convert probabilities to binary predictions
  y_pred_label = (y_pred > 0.5).float()  # using 0.9 threshold for high confidence

  # Calculate accuracy
  accuracy = (y_pred_label == y_test_tensor).float().mean()
  print(f"Test Accuracy: {accuracy.item() * 100:.2f}%")


Test Accuracy: 53.88%


## Conclusion
We successfully built and trained a logistic regression model **from scratch** using PyTorch.

- We manually managed weights, bias, and gradients.
- We used the binary cross-entropy loss function.
- We achieved a decent accuracy even with a high-confidence threshold.
