<a href="https://colab.research.google.com/github/smritiguleria/demo_wscubepython/blob/main/MINI_BATCH_gradient_descent.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import torch
from torch import nn
from torchvision.datasets import MNIST
from torch.utils.data import DataLoader
from torchvision import transforms
import time
import datetime
import os

In [5]:
# Define your neural network architecture with batch normalization
class MLP(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Flatten(),                   # Flatten the input image tensor
            nn.Linear(28 * 28, 64),         # Fully connected layer from 28*28 to 64 neurons
            nn.BatchNorm1d(64),             # Batch normalization for stability and faster convergence
            nn.ReLU(),                      # ReLU activation function
            nn.Linear(64, 32),              # Fully connected layer from 64 to 32 neurons
            nn.BatchNorm1d(32),             # Batch normalization for stability and faster convergence
            nn.ReLU(),                      # ReLU activation function
            nn.Linear(32, 10)               # Fully connected layer from 32 to 10 neurons (for MNIST classes)
        )

    def forward(self, x):
        return self.layers(x)

In [6]:
if __name__ == '__main__':
    # Set random seed for reproducibility
    torch.manual_seed(47)

    # Load the MNIST dataset
    transform = transforms.Compose([
        transforms.ToTensor()
    ])
    train_data = MNIST(os.getcwd(), download=True, transform=transforms.ToTensor())
    train_loader = DataLoader(train_data, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to /content/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 115354147.81it/s]


Extracting /content/MNIST/raw/train-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to /content/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 30489729.13it/s]


Extracting /content/MNIST/raw/train-labels-idx1-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to /content/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 28039064.74it/s]


Extracting /content/MNIST/raw/t10k-images-idx3-ubyte.gz to /content/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 14238063.35it/s]


Extracting /content/MNIST/raw/t10k-labels-idx1-ubyte.gz to /content/MNIST/raw



In [7]:
mlp = MLP() # Initialize MLP model
loss_function = nn.CrossEntropyLoss()    # Cross-entropy loss function for classification
optimizer = torch.optim.Adam(mlp.parameters(), lr=1e-3)   # Adam optimizer with learning rate 0.001

In [8]:
start_time = time.time()

# Training loop
for epoch in range(3):   # Iterate over 3 epochs
    print(f'Starting epoch {epoch + 1}')
    running_loss = 0.0
    for i, data in enumerate(train_loader, 0):
        inputs, labels = data
        optimizer.zero_grad()   # Zero the gradients
        outputs = mlp(inputs.view(inputs.shape[0], -1))   # Flatten the input for MLP and forward pass
        loss = loss_function(outputs, labels)   # Compute the loss
        loss.backward()   # Backpropagation
        optimizer.step()   # Optimizer step to update parameters

        running_loss += loss.item()
        if i % 100 == 99:   # Print every 100 mini-batches
            print(f'Epoch {epoch + 1}, Mini-batch {i + 1}, Loss: {running_loss / 100}')
            running_loss = 0.0
print('Training finished')

end_time = time.time() # Record end time
print('Training process has been completed. ')
training_time = end_time - start_time

print('Training time:', str(datetime.timedelta(seconds=training_time))) # for calculating the training time in minutes and seconds format

Starting epoch 1
Epoch 1, Mini-batch 100, Loss: 1.1071095156669617
Epoch 1, Mini-batch 200, Loss: 0.48408970385789873
Epoch 1, Mini-batch 300, Loss: 0.3104418051242828
Epoch 1, Mini-batch 400, Loss: 0.26336906000971794
Epoch 1, Mini-batch 500, Loss: 0.22288601607084274
Epoch 1, Mini-batch 600, Loss: 0.20098184302449226
Epoch 1, Mini-batch 700, Loss: 0.18423103533685206
Epoch 1, Mini-batch 800, Loss: 0.16405216380953788
Epoch 1, Mini-batch 900, Loss: 0.14631170395761728
Starting epoch 2
Epoch 2, Mini-batch 100, Loss: 0.12182405546307563
Epoch 2, Mini-batch 200, Loss: 0.1157226226851344
Epoch 2, Mini-batch 300, Loss: 0.12297858588397503
Epoch 2, Mini-batch 400, Loss: 0.12761864580214025
Epoch 2, Mini-batch 500, Loss: 0.12739025708287954
Epoch 2, Mini-batch 600, Loss: 0.10691166184842586
Epoch 2, Mini-batch 700, Loss: 0.12470327839255332
Epoch 2, Mini-batch 800, Loss: 0.10864155779592694
Epoch 2, Mini-batch 900, Loss: 0.10706586236134172
Starting epoch 3
Epoch 3, Mini-batch 100, Loss: 0.0