In [2]:
import torch

In [6]:
# Step1: Collect Training data and validation data
X_train = torch.rand(1000,3,32,32)
y_train = torch.randint(0,10,(1000,))
X_val = torch.rand(200,3,32,32)
y_val = torch.randint(0,10,(200,))

In [5]:
X_train

tensor([[[[0.1577, 0.7610, 0.7220,  ..., 0.6958, 0.8868, 0.4827],
          [0.3863, 0.1921, 0.8070,  ..., 0.1451, 0.1982, 0.5475],
          [0.2786, 0.0818, 0.1634,  ..., 0.3086, 0.4927, 0.5772],
          ...,
          [0.3411, 0.7345, 0.9046,  ..., 0.2871, 0.6153, 0.2594],
          [0.7008, 0.2920, 0.3096,  ..., 0.5381, 0.9472, 0.4198],
          [0.4530, 0.0112, 0.5556,  ..., 0.1709, 0.5627, 0.0843]],

         [[0.0325, 0.5672, 0.1361,  ..., 0.3558, 0.0178, 0.6700],
          [0.0367, 0.0112, 0.8780,  ..., 0.1471, 0.1294, 0.4985],
          [0.0575, 0.3445, 0.5683,  ..., 0.4550, 0.8189, 0.7471],
          ...,
          [0.2902, 0.2995, 0.1767,  ..., 0.7758, 0.2998, 0.1569],
          [0.5353, 0.5754, 0.3412,  ..., 0.3014, 0.5056, 0.8275],
          [0.6743, 0.2188, 0.5403,  ..., 0.8860, 0.5547, 0.9805]],

         [[0.1775, 0.4070, 0.4259,  ..., 0.0476, 0.7629, 0.2812],
          [0.6454, 0.4023, 0.7123,  ..., 0.7276, 0.4605, 0.2548],
          [0.6084, 0.1394, 0.6550,  ..., 0

In [10]:
# Step 2: Dataset
from torch.utils.data import DataLoader,TensorDataset

In [11]:
train_dataset = TensorDataset(X_train,y_train)
val_dataset = TensorDataset(X_val,y_val)

In [12]:
# Step 3: DataLoader
train_loader = DataLoader(train_dataset,batch_size=32)
val_loader = DataLoader(val_dataset,batch_size=32)

In [31]:
# Step 4: Define Model
import torch.nn as nn

class ConvolutionModel(nn.Module):
    def __init__(self):
        super().__init__()
        # layer 1
        self.layer1 = nn.Conv2d(in_channels = 3, out_channels = 16, kernel_size = 3) # (32-3)/3+1=10  -> 16X30X30
        self.relu1 = nn.ReLU()  # ->16X30X30
        self.pooling1 = nn.MaxPool2d(kernel_size = 2,stride=2) # -> 16X29X29
        # layer 2
        self.layer2 = nn.Conv2d(in_channels = 16, out_channels = 32, kernel_size = 3) # -> 32X28X28
        self.relu2 = nn.ReLU()
        self.pooling2 = nn.MaxPool2d(kernel_size = 2,stride =2) #->32X26X26
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(1152,128)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(128,10)

    def forward(self,x):
        x = self.layer1(x)
        x = self.relu1(x)
        x = self.pooling1(x)
        x = self.layer2(x)
        x = self.relu2(x)
        x = self.pooling2(x)
        x = self.flatten(x)
        x= self.fc1(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x
model = ConvolutionModel()     

In [32]:
# Step 5
# Define Infra
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimiser = optim.Adam(model.parameters(),lr=0.001)
num_epochs = 10

In [33]:
# Step 6
# Training loop with check pointing
best_val_loss = float('inf')
for epoch in range(0,num_epochs):
    # training
    model.train()
    train_loss = 0
    for batch_data,batch_target in train_loader:
        output = model(batch_data)
        loss = criterion(output,batch_target) # Compute loss
        optimiser.zero_grad() # Zero gradients from previous iteration
        loss.backward() # calcluate new gradients via back propagation
        optimiser.step() # updates gradients
        train_loss+=loss.item()
        
    train_loss/=len(train_loader)      
    # validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for batch_data,batch_target in val_loader:
            output = model(batch_data)
            loss = criterion(output,batch_target)
            val_loss+=loss.item()
        val_loss/=len(val_loader)
    print(f"Epoch {epoch+1}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}")

    # Save best checkpoint
    if val_loss < best_val_loss:
        best_val_loss = val_loss
        checkpoint = {
            'epoch': epoch,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimiser.state_dict(),
            'loss': val_loss,
        }
        torch.save(checkpoint, 'checkpoints/best_model.pt')
        print(f"  ✓ Best model saved!")

Epoch 1: Train Loss=2.3092, Val Loss=2.3058
  ✓ Best model saved!
Epoch 2: Train Loss=2.3037, Val Loss=2.3035
  ✓ Best model saved!
Epoch 3: Train Loss=2.3035, Val Loss=2.3032
  ✓ Best model saved!
Epoch 4: Train Loss=2.3011, Val Loss=2.3018
  ✓ Best model saved!
Epoch 5: Train Loss=2.3035, Val Loss=2.3027
Epoch 6: Train Loss=2.2970, Val Loss=2.3048
Epoch 7: Train Loss=2.2914, Val Loss=2.3084
Epoch 8: Train Loss=2.2757, Val Loss=2.3167
Epoch 9: Train Loss=2.2476, Val Loss=2.3315
Epoch 10: Train Loss=2.1922, Val Loss=2.3507


In [16]:
# Convolution formula
# outputsize = (input_size-kernel_size+2*Padding)/stride +1
import math

input_size= 32
kernel_size= 3
padding = 0
stride =1
output_size= math.floor(((input_size-kernel_size+2*padding)/stride))+1
print(output_size)


30


In [21]:
# Max pooling
input_size= 30
kernel_size= 2
stride =2
output_size = math.floor((input_size-kernel_size)/stride)+1
print(output_size)

15


In [22]:
input_size= 15
kernel_size= 3
padding = 0
stride =1
output_size= math.floor(((input_size-kernel_size+2*padding)/stride))+1
print(output_size)


13


In [23]:
input_size= 13
kernel_size= 2
stride =2
output_size = math.floor((input_size-kernel_size)/stride)+1
print(output_size)

6
