In [1]:
import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [4]:
transform = transforms.ToTensor()

train_dataset = datasets.MNIST(root ='./data',train = True,download= True,transform = transform)
test_dataset = datasets.MNIST(root ='./data',train = False,download= True,transform = transform)

In [5]:
batch_size = 64
train_dataloader = DataLoader(train_dataset,batch_size = batch_size,shuffle = True)
test_dataloader = DataLoader(test_dataset,batch_size = batch_size,shuffle=False)

In [None]:
print(f"Training samples: {len(train_dataset)}")
print(f"Test samples: {len(test_dataset)}")
print(f"Batch size: {batch_size}")


Training samples: 60000
Test samples: 10000
Batch size: 64


In [14]:
X, y = next(iter(train_dataloader))
print("Image tensor shape:", X.shape)
print("Label tensor shape:", y.shape)
print(f"\nLabels in this batch: {y[:10].tolist()}...")


Image tensor shape: torch.Size([64, 1, 28, 28])
Label tensor shape: torch.Size([64])

Labels in this batch: [9, 1, 4, 4, 5, 4, 0, 9, 2, 7]...


In [15]:
class SimpleMLP(nn.Module):
    def __init__(self):
        super(SimpleMLP,self).__init__()
        self.flatten = nn.Flatten()
        self.linear1 = nn.Linear(784, 128)
        self.relu1 = nn.ReLU()
        self.linear2 = nn.Linear(128,64)
        self.relu2 = nn.ReLU()
        self.linear3 = nn.Linear(64,10)
    
    def forward(self,x):
        x = self.flatten(x)
        x = self.linear1(x)
        x = self.relu1(x)
        x = self.linear2(x)
        x = self.relu2(x)
        x = self.linear3(x)
        return x
model = SimpleMLP()    

In [18]:
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(),lr =0.001)

In [21]:
def train(dataloader, model,loss_fn, optimizer,epochs = 5):
    model.train()
    for epoch in range(epochs):
        total_loss= 0.0
        num_batches = 0

        for batch_idx,(X,y) in enumerate(dataloader):
            pred = model(X)

            loss = loss_fn(pred,y)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()
            total_loss += loss.item()
            num_batches+=1

            if (batch_idx + 1) % 100 == 0:
                avg_loss = total_loss/num_batches
                print(f'Epoch {epoch + 1}/{epochs}, Batch {batch_idx + 1}/{len(dataloader)}, Loss: {avg_loss:.4f}')
    avg_loss = total_loss / num_batches
    print(f'Epoch {epoch + 1}/{epochs} completed. Average Loss: {avg_loss:.4f}\n')
# Train the model
print("Starting training...\n")
train(train_dataloader, model, loss_fn, optimizer, epochs=5)
print("Training completed!")           

Starting training...

Epoch 1/5, Batch 100/938, Loss: 1.0121
Epoch 1/5, Batch 200/938, Loss: 0.7086
Epoch 1/5, Batch 300/938, Loss: 0.5796
Epoch 1/5, Batch 400/938, Loss: 0.5098
Epoch 1/5, Batch 500/938, Loss: 0.4613
Epoch 1/5, Batch 600/938, Loss: 0.4278
Epoch 1/5, Batch 700/938, Loss: 0.4000
Epoch 1/5, Batch 800/938, Loss: 0.3774
Epoch 1/5, Batch 900/938, Loss: 0.3571
Epoch 2/5, Batch 100/938, Loss: 0.1791
Epoch 2/5, Batch 200/938, Loss: 0.1662
Epoch 2/5, Batch 300/938, Loss: 0.1639
Epoch 2/5, Batch 400/938, Loss: 0.1612
Epoch 2/5, Batch 500/938, Loss: 0.1594
Epoch 2/5, Batch 600/938, Loss: 0.1551
Epoch 2/5, Batch 700/938, Loss: 0.1507
Epoch 2/5, Batch 800/938, Loss: 0.1475
Epoch 2/5, Batch 900/938, Loss: 0.1447
Epoch 3/5, Batch 100/938, Loss: 0.1082
Epoch 3/5, Batch 200/938, Loss: 0.1016
Epoch 3/5, Batch 300/938, Loss: 0.1010
Epoch 3/5, Batch 400/938, Loss: 0.0995
Epoch 3/5, Batch 500/938, Loss: 0.0990
Epoch 3/5, Batch 600/938, Loss: 0.0976
Epoch 3/5, Batch 700/938, Loss: 0.0970
Epo