Implementing mini-batch gradient descent in NN using Pytorch



In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.datasets import load_breast_cancer
from torch.utils.data import TensorDataset, DataLoader
from sklearn.metrics import confusion_matrix, f1_score
torch.manual_seed(0)

def preprocess_data(X, y, batch_size):
  # Split the data into training and test sets
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
  print(f'X_train shape: {X_train.shape}') #(455, 30)
  print(f'y_train shape: {y_train.shape}') #(455,)
  print(f'X_test shape: {X_test.shape}') #(114, 30)
  print(f'y_test shape: {y_test.shape}') #(114,)

  # Normalize the features
  scaler = StandardScaler()
  X_train = scaler.fit_transform(X_train)
  X_test = scaler.transform(X_test)

  # Convert to PyTorch tensors
  X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
  y_train_tensor = torch.tensor(y_train, dtype=torch.float32).view(len(y_train), 1)
  X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
  y_test_tensor = torch.tensor(y_test, dtype=torch.float32).view(len(y_test), 1)
  print(f'X_train_tensor shape: {X_train_tensor.shape}') #torch.Size([455, 30])
  print(f'y_train_tensor shape: {y_train_tensor.shape}') #torch.Size([455, 1])
  print(f'X_test_tensor shape: {X_test_tensor.shape}') #torch.Size([114, 30])
  print(f'y_test_tensor shape: {y_test_tensor.shape}') #torch.Size([114, 1])

  # Create DataLoader for mini-batch gradient descent with batch size of 32
  batch_size = batch_size
  train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
  train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

  test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
  test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
  return train_loader, test_loader

# Define the neural network model
class Net(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(input_dim, hidden_dim)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(hidden_dim, hidden_dim)
        self.relu2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_dim, output_dim)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.sigmoid(x)
        return x

# Training loop with mini-batch gradient descent
def train_model(net, train_loader, criterion, optimizer, epochs):
    net.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for inputs, targets in train_loader:
            optimizer.zero_grad()
            outputs = net(inputs) #forward pass
            loss = criterion(outputs, targets) #average loss
            loss.backward() #gradients calaculation
            optimizer.step() #weight updation
            running_loss += loss.item() * inputs.size(0) #total loss of a mini-batch

        epoch_loss = running_loss / len(train_loader.dataset) #average loss per sample across all minibatches in the current epoch.
        print(f'Epoch {epoch+1}/{epochs}, Loss: {epoch_loss:.4f}')

def evaluate_model(net, test_loader):
    net.eval() #switching to evaluate mode
    correct = 0
    total = 0
    y_true, y_pred = [], []
    with torch.no_grad(): #disable gradient computation
        for inputs, targets in test_loader:
            outputs = net(inputs)
            predicted = (outputs >= 0.5).float()
            total += targets.size(0)
            correct += (predicted == targets).sum().item()
            y_true.extend(targets.tolist())
            y_pred.extend(predicted.tolist())

    accuracy = correct / total
    print(f'Accuracy on the test set: {accuracy:.4f}')
    print(f'Confusion matrix: {confusion_matrix(y_true, y_pred)}')
    print(f'F1 score: {f1_score(y_true, y_pred)}')


# Load and preprocess the data
data = load_breast_cancer()
X, y = data.data, data.target
train_loader, test_loader = preprocess_data(X, y, batch_size=32)

net = Net(input_dim=X.shape[1], hidden_dim=16, output_dim=1)

# Define loss function and optimizer
criterion = nn.BCELoss()  # Binary Cross Entropy Loss
optimizer = optim.SGD(net.parameters(), lr=0.01)

train_model(net, train_loader, criterion, optimizer, epochs=30)
evaluate_model(net, test_loader)

X_train shape: (455, 30)
y_train shape: (455,)
X_test shape: (114, 30)
y_test shape: (114,)
y_train unique values: tensor([0., 1.])
X_train_tensor shape: torch.Size([455, 30])
y_train_tensor shape: torch.Size([455, 1])
X_test_tensor shape: torch.Size([114, 30])
y_test_tensor shape: torch.Size([114, 1])
Epoch 1/30, Loss: 0.7320
Epoch 2/30, Loss: 0.7167
Epoch 3/30, Loss: 0.7033
Epoch 4/30, Loss: 0.6907
Epoch 5/30, Loss: 0.6788
Epoch 6/30, Loss: 0.6676
Epoch 7/30, Loss: 0.6565
Epoch 8/30, Loss: 0.6449
Epoch 9/30, Loss: 0.6335
Epoch 10/30, Loss: 0.6218
Epoch 11/30, Loss: 0.6091
Epoch 12/30, Loss: 0.5960
Epoch 13/30, Loss: 0.5819
Epoch 14/30, Loss: 0.5670
Epoch 15/30, Loss: 0.5512
Epoch 16/30, Loss: 0.5347
Epoch 17/30, Loss: 0.5173
Epoch 18/30, Loss: 0.4994
Epoch 19/30, Loss: 0.4815
Epoch 20/30, Loss: 0.4634
Epoch 21/30, Loss: 0.4452
Epoch 22/30, Loss: 0.4266
Epoch 23/30, Loss: 0.4080
Epoch 24/30, Loss: 0.3897
Epoch 25/30, Loss: 0.3716
Epoch 26/30, Loss: 0.3539
Epoch 27/30, Loss: 0.3367
Epo