In [32]:
import torch

In [None]:
X_train = torch.rand(500,3,32,32)
y_train = torch.randint(0,10,(500,))
X_val = torch.rand(100,3,32,32)
y_val = torch.randint(0,10,(100,))

In [34]:
from torch.utils.data import DataLoader, TensorDataset

In [35]:
train_dataset = TensorDataset(X_train,y_train)
val_dataset = TensorDataset(X_val,y_val)

In [36]:
train_dataloader = DataLoader(train_dataset,batch_size=32)
val_dataloader = DataLoader(val_dataset,batch_size=32)

In [37]:
# Step 4: Define Model
import torch.nn as nn

class ConvolutionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # layer 1
        self.layer1 = nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 3) # (32-3)/3+1=10  -> 16X30X30
        self.relu1 = nn.ReLU()  # ->16X30X30
        self.pooling1 = nn.MaxPool2d(kernel_size = 2,stride=2) # -> 16X29X29
        # layer 2
        self.layer2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3) # -> 32X28X28
        self.relu2 = nn.ReLU()
        self.pooling2 = nn.MaxPool2d(kernel_size = 2,stride =2) #->32X26X26
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(1152,128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128,10)

    def forward(self,x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.pooling1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.pooling2(x)
        x = self.flatten(x)
        x= self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
model = ConvolutionModel()     

In [38]:
# Step 5
# Define Infra
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(),lr=0.001)
num_epochs = 5

In [39]:
# Step 6
# Training loop with check pointing
best_val_loss = float('inf')
for epoch in range(0,num_epochs):
    # training
    model.train()
    train_loss = 0
    for batch_data,batch_target in train_dataloader:
        output = model(batch_data)
        loss = criterion(output,batch_target) # Compute loss
        optimiser.zero_grad() # Zero gradients from previous iteration
        loss.backward() # calcluate new gradients via back propagation
        optimiser.step() # updates gradients
        train_loss+=loss.item()
        
    train_loss/=len(train_dataloader)      
    # validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_data,batch_target in val_dataloader:
            output = model(batch_data)
            loss = criterion(output,batch_target)
            val_loss+=loss.item()
        val_loss/=len(val_dataloader)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")

    # Save best checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimiser.state_dict(),
            'loss': val_loss,
        }
        torch.save(checkpoint, 'checkpoints/best_model.pt')
        print(f"  ✓ Best model saved!")

Epoch 1: Train Loss=2.3067, Val Loss=2.3044
  ✓ Best model saved!
Epoch 2: Train Loss=2.2931, Val Loss=2.3174
Epoch 3: Train Loss=2.2908, Val Loss=2.3100
Epoch 4: Train Loss=2.2852, Val Loss=2.3120
Epoch 5: Train Loss=2.2794, Val Loss=2.3085
Epoch 6: Train Loss=2.2709, Val Loss=2.3095
Epoch 7: Train Loss=2.2580, Val Loss=2.3073
Epoch 8: Train Loss=2.2448, Val Loss=2.3131
Epoch 9: Train Loss=2.2169, Val Loss=2.3128
Epoch 10: Train Loss=2.1803, Val Loss=2.3226


In [40]:
# Step 7 
# Inference
# Load best checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])

# Perform inference
model.eval()
test_data = torch.randn(10, 3,32,32)  # 10 new samples

with torch.no_grad():
    output = model(test_data)
    predictions = torch.argmax(output, dim=1)
    probabilities = torch.softmax(output, dim=1)

print("Predictions:", predictions)
print("Confidence:", probabilities.max(dim=1)[0])

Predictions: tensor([3, 3, 3, 3, 3, 3, 3, 3, 3, 3])
Confidence: tensor([0.1267, 0.1337, 0.1297, 0.1298, 0.1324, 0.1315, 0.1310, 0.1269, 0.1286,
        0.1319])


In [41]:
# Step 8
# Resume the checkpoint
checkpoint = torch.load('checkpoints/best_model.pt')
model.load_state_dict(checkpoint['model_state_dict'])
optimiser.load_state_dict(checkpoint['optimizer_state_dict'])
start_epoch = checkpoint['epoch'] + 1
# Continue training
for epoch in range(start_epoch, num_epochs + 5):
    # training
    model.train()
    train_loss = 0
    for batch_data,batch_target in train_dataloader:
        output = model(batch_data)
        loss = criterion(output,batch_target) # Compute loss
        optimiser.zero_grad() # Zero gradients from previous iteration
        loss.backward() # calcluate new gradients via back propagation
        optimiser.step() # updates gradients
        train_loss+=loss.item()
        
    train_loss/=len(train_dataloader)      
    # validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_data,batch_target in val_dataloader:
            output = model(batch_data)
            loss = criterion(output,batch_target)
            val_loss+=loss.item()
        val_loss/=len(val_dataloader)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")

    # Save best checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimiser.state_dict(),
            'loss': val_loss,
        }
        torch.save(checkpoint, 'checkpoints/best_model.pt')
        print(f"  ✓ Best model saved!")

Epoch 2: Train Loss=2.2931, Val Loss=2.3174
Epoch 3: Train Loss=2.2908, Val Loss=2.3100
Epoch 4: Train Loss=2.2852, Val Loss=2.3120
Epoch 5: Train Loss=2.2794, Val Loss=2.3085
Epoch 6: Train Loss=2.2709, Val Loss=2.3095
Epoch 7: Train Loss=2.2580, Val Loss=2.3073
Epoch 8: Train Loss=2.2448, Val Loss=2.3131
Epoch 9: Train Loss=2.2169, Val Loss=2.3128
Epoch 10: Train Loss=2.1803, Val Loss=2.3226
Epoch 11: Train Loss=2.1221, Val Loss=2.3248
Epoch 12: Train Loss=2.0416, Val Loss=2.3384
Epoch 13: Train Loss=1.9317, Val Loss=2.3378
Epoch 14: Train Loss=1.7826, Val Loss=2.3610
Epoch 15: Train Loss=1.6073, Val Loss=2.3861
