In [1]:
import torch

In [20]:
# Step1: Collect Training data and validation data
X_train = torch.randn(1000,784)
y_train = torch.randint(0,10,(1000,))
X_val = torch.randn(200,784)
y_val = torch.randint(0,10,(200,))

In [21]:
print(X_train)
print(y_train)

tensor([[ 0.5651,  0.3965,  0.2860,  ..., -0.1750, -1.0937,  0.7724],
        [ 0.4301, -0.2022, -0.1151,  ..., -0.3143,  0.6914,  0.7373],
        [ 0.5783, -0.8691, -0.2475,  ..., -0.5898, -0.6033, -0.2576],
        ...,
        [-2.2472, -0.7151,  2.1062,  ...,  0.5957, -0.6554, -0.7459],
        [-0.8953, -0.5404, -0.4277,  ..., -0.5333,  1.5980, -0.5130],
        [ 0.0940, -0.4892,  0.1914,  ...,  1.0593, -0.1431,  0.5592]])
tensor([9, 0, 4, 7, 1, 8, 5, 2, 9, 3, 4, 9, 9, 4, 8, 9, 4, 1, 8, 1, 2, 8, 2, 6,
        7, 8, 0, 0, 4, 7, 1, 0, 8, 0, 8, 7, 1, 7, 7, 5, 7, 3, 0, 1, 6, 0, 2, 8,
        3, 2, 3, 8, 3, 9, 4, 1, 1, 0, 2, 8, 3, 5, 6, 1, 2, 0, 3, 8, 9, 1, 2, 1,
        6, 4, 8, 2, 8, 3, 6, 2, 9, 5, 8, 3, 9, 3, 6, 6, 3, 0, 9, 8, 6, 8, 5, 0,
        3, 4, 0, 3, 0, 3, 1, 3, 5, 0, 6, 2, 8, 1, 4, 9, 2, 8, 8, 6, 3, 8, 1, 8,
        3, 6, 6, 0, 2, 0, 5, 0, 8, 3, 2, 8, 8, 0, 1, 0, 5, 4, 6, 2, 8, 6, 0, 4,
        0, 1, 9, 7, 3, 9, 2, 8, 7, 8, 2, 7, 6, 7, 9, 3, 4, 1, 4, 8, 9, 8, 9, 8,
      

In [22]:
# Data Loaders
from torch.utils.data import DataLoader,TensorDataset 

In [23]:
# Step 2: Get the datasets
train_dataset = TensorDataset(X_train,y_train)
val_dataset = TensorDataset(X_val,y_val)

In [26]:
# Step 3: 
# Get the dataloaders
# while crating the DataLoaders mention batch size
train_dataloader = DataLoader(train_dataset,batch_size=32) 
val_dataloader = DataLoader(val_dataset,batch_size=32)

In [33]:
train_dataloader

<torch.utils.data.dataloader.DataLoader at 0x10b5b8f30>

In [27]:
# Create directories
import os
os.makedirs('checkpoints', exist_ok=True)

In [31]:
# Step 4
# Model Creation
import torch.nn as nn

class SimpleModel(nn.Module):
    def __init__(self):
        super().__init__()
        ##First Layer
        self.fc1 = nn.Linear(784,128)
        self.relu1 = nn.ReLU()
        ##Second Layer
        self.fc2 = nn.Linear(128,64)
        self.relu2= nn.ReLU()
        ## Final
        self.fc3 = nn.Linear(64,10)
    def forward(self,x):
        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        return x

model = SimpleModel()      

In [32]:
# Step 5
# Define Infra
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(),lr=0.001)
num_epochs = 10

In [39]:
# Step 6
# Training loop with check pointing
best_val_loss = float('inf')
for epoch in range(0,num_epochs):
    # training
    model.train()
    train_loss = 0
    for batch_data,batch_target in train_dataloader:
        output = model(batch_data)
        loss = criterion(output,batch_target) # Compute loss
        optimiser.zero_grad() # Zero gradients from previous iteration
        loss.backward() # calcluate new gradients via back propagation
        optimiser.step() # updates gradients
        train_loss+=loss.item()
        
    train_loss/=len(train_dataloader)      
    # validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_data,batch_target in val_dataloader:
            output = model(batch_data)
            loss = criterion(output,batch_target)
            val_loss+=loss.item()
        val_loss/=len(val_dataloader)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")

    # Save best checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimiser.state_dict(),
            'loss': val_loss,
        }
        torch.save(checkpoint, 'checkpoints/best_model.pt')
        print(f"  ✓ Best model saved!")

Epoch 1: Train Loss=0.5657, Val Loss=2.8903
  ✓ Best model saved!
Epoch 2: Train Loss=0.1044, Val Loss=3.2346
Epoch 3: Train Loss=0.0320, Val Loss=3.4321
Epoch 4: Train Loss=0.0172, Val Loss=3.5524
Epoch 5: Train Loss=0.0115, Val Loss=3.6473
Epoch 6: Train Loss=0.0084, Val Loss=3.7267
Epoch 7: Train Loss=0.0065, Val Loss=3.7954
Epoch 8: Train Loss=0.0052, Val Loss=3.8559
Epoch 9: Train Loss=0.0042, Val Loss=3.9106
Epoch 10: Train Loss=0.0035, Val Loss=3.9605


In [44]:
# Step 7 
# Inference
# Load best checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])

# Perform inference
model.eval()
test_data = torch.randn(10, 784)  # 10 new samples

with torch.no_grad():
    output = model(test_data)
    predictions = torch.argmax(output, dim=1)
    probabilities = torch.softmax(output, dim=1)

print("Predictions:", predictions)
print("Confidence:", probabilities.max(dim=1)[0])

Predictions: tensor([7, 3, 8, 2, 6, 3, 9, 5, 0, 6])
Confidence: tensor([0.6436, 0.3926, 0.2961, 0.7322, 0.4617, 0.4327, 0.2413, 0.4795, 0.7745,
        0.3017])


In [46]:
# Step 8
# Resume the checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimiser.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch'] + 1
# Continue training
for epoch in range(start_epoch, num_epochs + 5):
    # Training loop continues...
    pass